diff --git a/.env.template b/.env.template index fa873b0bd..b9c9ef94b 100644 --- a/.env.template +++ b/.env.template @@ -6,6 +6,38 @@ RUN_CANCEL_KEY_TTL_SECONDS=1800 LANGGRAPH_CHECKPOINTER_BACKEND=postgres VITE_USE_RUNS_API=false +# Sandbox (deerFlow-style provisioner) +SANDBOX_PROVIDER=provisioner +SANDBOX_PROVISIONER_URL=http://sandbox-provisioner:8002 +SANDBOX_VIRTUAL_PATH_PREFIX=/mnt/user-data +SANDBOX_EXEC_TIMEOUT_SECONDS=180 +SANDBOX_MAX_OUTPUT_BYTES=262144 +SANDBOX_KEEPALIVE_INTERVAL_SECONDS=30 +SANDBOX_IDLE_TIMEOUT_SECONDS=120 +SANDBOX_IDLE_CHECK_INTERVAL_SECONDS=10 +# sandbox-provisioner backend: memory | local | docker | kubernetes +SANDBOX_PROVISIONER_BACKEND=local +# local/docker backend defaults (deerFlow local_backend style) +# SANDBOX_CONTAINER_PORT=8080 +# SANDBOX_DOCKER_SANDBOX_HOST=host.docker.internal +# Docker provisioner options (used when SANDBOX_PROVISIONER_BACKEND=local/docker) +# SANDBOX_DOCKER_NETWORK=yuxi-know_app-network +# SANDBOX_DOCKER_THREADS_HOST_PATH= +# SANDBOX_DOCKER_SKILLS_HOST_PATH= +# SANDBOX_DOCKER_SANDBOX_PREFIX=yuxi-sandbox +# Optional proxy for sandbox-provisioner container +# SANDBOX_HTTP_PROXY=http://host.docker.internal:7897 +# SANDBOX_HTTPS_PROXY=http://host.docker.internal:7897 +# K8s provisioner options (used when SANDBOX_PROVISIONER_BACKEND=kubernetes) +# SANDBOX_K8S_NAMESPACE=yuxi-know +# SANDBOX_IMAGE=enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest +# SANDBOX_SKILLS_HOST_PATH=/app/saves/skills +# SANDBOX_THREADS_HOST_PATH=/app/saves/threads +# SANDBOX_NODE_HOST=host.docker.internal +# KUBECONFIG_PATH=/root/.kube/config +# Memory backend sandbox url template, supports {sandbox_id} +# MEMORY_SANDBOX_URL_TEMPLATE=http://agent-sandbox:8000 + # region model_provider SILICONFLOW_API_KEY= # 推荐使用硅基流动免费服务 https://cloud.siliconflow.cn/i/Eo5yTHGJ TAVILY_API_KEY= # 获取搜索服务的 api key 请访问 https://app.tavily.com/ diff --git a/AGENTS.md b/AGENTS.md index 3706ab943..269378772 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -37,7 +37,7 @@ make format # 格式化代码 # 直接在容器内执行命令 docker compose exec api uv run python test/your_script.py # 放在 test 文件夹 ``` - +安装依赖太慢时可以使用代理端口 7897 注意: - Python 代码要符合 Python 的规范,符合 pythonic 风格 - 尽量使用较新的语法,避免使用旧版本的语法(版本兼容到 3.12+) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 4023ae8f0..f4c103156 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -32,8 +32,13 @@ services: - MINIO_URI=${MINIO_URI:-http://milvus-minio:9000} - MODEL_DIR_IN_DOCKER=/models - RUNNING_IN_DOCKER=true - - NO_PROXY=localhost,127.0.0.1,milvus,graph,milvus-minio,milvus-etcd,etcd,minio,mineru,paddlex,api.siliconflow.cn - - no_proxy=localhost,127.0.0.1,milvus,graph,milvus-minio,milvus-etcd,etcd,minio,mineru,paddlex,api.siliconflow.cn + - SANDBOX_PROVIDER=${SANDBOX_PROVIDER:-provisioner} + - SANDBOX_PROVISIONER_URL=${SANDBOX_PROVISIONER_URL:-http://sandbox-provisioner:8002} + - SANDBOX_VIRTUAL_PATH_PREFIX=${SANDBOX_VIRTUAL_PATH_PREFIX:-/mnt/user-data} + - SANDBOX_EXEC_TIMEOUT_SECONDS=${SANDBOX_EXEC_TIMEOUT_SECONDS:-180} + - SANDBOX_MAX_OUTPUT_BYTES=${SANDBOX_MAX_OUTPUT_BYTES:-262144} + - NO_PROXY=localhost,127.0.0.1,host.docker.internal,milvus,graph,milvus-minio,milvus-etcd,etcd,minio,mineru,paddlex,sandbox-provisioner,api.siliconflow.cn + - no_proxy=localhost,127.0.0.1,host.docker.internal,milvus,graph,milvus-minio,milvus-etcd,etcd,minio,mineru,paddlex,sandbox-provisioner,api.siliconflow.cn command: uv run --no-dev uvicorn server.main:app --host 0.0.0.0 --port 5050 restart: unless-stopped healthcheck: @@ -51,6 +56,8 @@ services: condition: service_healthy minio: condition: service_healthy + sandbox-provisioner: + condition: service_healthy worker: build: @@ -85,8 +92,13 @@ services: - MINIO_URI=${MINIO_URI:-http://milvus-minio:9000} - MODEL_DIR_IN_DOCKER=/models - RUNNING_IN_DOCKER=true - - NO_PROXY=localhost,127.0.0.1,milvus,graph,milvus-minio,milvus-etcd,etcd,minio,mineru,paddlex,api.siliconflow.cn - - no_proxy=localhost,127.0.0.1,milvus,graph,milvus-minio,milvus-etcd,etcd,minio,mineru,paddlex,api.siliconflow.cn + - SANDBOX_PROVIDER=${SANDBOX_PROVIDER:-provisioner} + - SANDBOX_PROVISIONER_URL=${SANDBOX_PROVISIONER_URL:-http://sandbox-provisioner:8002} + - SANDBOX_VIRTUAL_PATH_PREFIX=${SANDBOX_VIRTUAL_PATH_PREFIX:-/mnt/user-data} + - SANDBOX_EXEC_TIMEOUT_SECONDS=${SANDBOX_EXEC_TIMEOUT_SECONDS:-180} + - SANDBOX_MAX_OUTPUT_BYTES=${SANDBOX_MAX_OUTPUT_BYTES:-262144} + - NO_PROXY=localhost,127.0.0.1,host.docker.internal,milvus,graph,milvus-minio,milvus-etcd,etcd,minio,mineru,paddlex,sandbox-provisioner,api.siliconflow.cn + - no_proxy=localhost,127.0.0.1,host.docker.internal,milvus,graph,milvus-minio,milvus-etcd,etcd,minio,mineru,paddlex,sandbox-provisioner,api.siliconflow.cn command: uv run --no-dev arq server.worker_main.WorkerSettings restart: unless-stopped depends_on: @@ -98,6 +110,46 @@ services: condition: service_healthy minio: condition: service_healthy + sandbox-provisioner: + condition: service_healthy + + sandbox-provisioner: + build: + context: ./docker/sandbox_provisioner + dockerfile: Dockerfile + container_name: sandbox-provisioner + volumes: + - ./saves:/app/saves + - /var/run/docker.sock:/var/run/docker.sock + networks: + - app-network + extra_hosts: + - "host.docker.internal:host-gateway" + environment: + - PROVISIONER_BACKEND=${SANDBOX_PROVISIONER_BACKEND:-local} + - K8S_NAMESPACE=${SANDBOX_K8S_NAMESPACE:-yuxi-know} + - SANDBOX_IMAGE=${SANDBOX_IMAGE:-enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest} + - SANDBOX_CONTAINER_PORT=${SANDBOX_CONTAINER_PORT:-8080} + - SKILLS_HOST_PATH=${SANDBOX_SKILLS_HOST_PATH:-/app/saves/skills} + - THREADS_HOST_PATH=${SANDBOX_THREADS_HOST_PATH:-/app/saves/threads} + - NODE_HOST=${SANDBOX_NODE_HOST:-host.docker.internal} + - KUBECONFIG_PATH=${KUBECONFIG_PATH:-} + - MEMORY_SANDBOX_URL_TEMPLATE=${MEMORY_SANDBOX_URL_TEMPLATE:-http://agent-sandbox:8000} + - DOCKER_NETWORK=${SANDBOX_DOCKER_NETWORK:-yuxi-know_app-network} + - DOCKER_THREADS_HOST_PATH=${SANDBOX_DOCKER_THREADS_HOST_PATH:-} + - DOCKER_SKILLS_HOST_PATH=${SANDBOX_DOCKER_SKILLS_HOST_PATH:-} + - DOCKER_SANDBOX_PREFIX=${SANDBOX_DOCKER_SANDBOX_PREFIX:-yuxi-sandbox} + - DOCKER_SANDBOX_HOST=${SANDBOX_DOCKER_SANDBOX_HOST:-host.docker.internal} + - SANDBOX_IDLE_TIMEOUT_SECONDS=${SANDBOX_IDLE_TIMEOUT_SECONDS:-120} + - SANDBOX_IDLE_CHECK_INTERVAL_SECONDS=${SANDBOX_IDLE_CHECK_INTERVAL_SECONDS:-10} + - SANDBOX_EXEC_TIMEOUT_SECONDS=${SANDBOX_EXEC_TIMEOUT_SECONDS:-180} + healthcheck: + test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8002/health').read()"] + interval: 10s + timeout: 5s + retries: 6 + start_period: 10s + restart: unless-stopped web: build: diff --git a/docker-compose.yml b/docker-compose.yml index 46b03b0f5..08029b745 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ services: build: context: . dockerfile: docker/api.Dockerfile - image: yuxi-api:0.5.dev + image: yuxi-api:0.5.2.dev container_name: api-dev working_dir: /app volumes: @@ -41,8 +41,13 @@ services: - MINIO_URI=${MINIO_URI:-http://milvus-minio:9000} - MODEL_DIR_IN_DOCKER=/models - RUNNING_IN_DOCKER=true - - NO_PROXY=localhost,127.0.0.1,milvus,graph,milvus-minio,milvus-etcd-dev,etcd,minio,mineru,paddlex,api.siliconflow.cn - - no_proxy=localhost,127.0.0.1,milvus,graph,milvus-minio,milvus-etcd-dev,etcd,minio,mineru,paddlex,api.siliconflow.cn + - SANDBOX_PROVIDER=${SANDBOX_PROVIDER:-provisioner} + - SANDBOX_PROVISIONER_URL=${SANDBOX_PROVISIONER_URL:-http://sandbox-provisioner:8002} + - SANDBOX_VIRTUAL_PATH_PREFIX=${SANDBOX_VIRTUAL_PATH_PREFIX:-/mnt/user-data} + - SANDBOX_EXEC_TIMEOUT_SECONDS=${SANDBOX_EXEC_TIMEOUT_SECONDS:-180} + - SANDBOX_MAX_OUTPUT_BYTES=${SANDBOX_MAX_OUTPUT_BYTES:-262144} + - NO_PROXY=localhost,127.0.0.1,host.docker.internal,milvus,graph,milvus-minio,milvus-etcd-dev,etcd,minio,mineru,paddlex,sandbox-provisioner,api.siliconflow.cn + - no_proxy=localhost,127.0.0.1,host.docker.internal,milvus,graph,milvus-minio,milvus-etcd-dev,etcd,minio,mineru,paddlex,sandbox-provisioner,api.siliconflow.cn # endregion api_envs command: uv run --no-dev uvicorn server.main:app --host 0.0.0.0 --port 5050 --reload restart: unless-stopped @@ -61,12 +66,14 @@ services: condition: service_healthy minio: condition: service_healthy + sandbox-provisioner: + condition: service_healthy worker: build: context: . dockerfile: docker/api.Dockerfile - image: yuxi-api:0.5.dev + image: yuxi-api:0.5.2.dev container_name: worker-dev working_dir: /app volumes: @@ -103,8 +110,13 @@ services: - MINIO_URI=${MINIO_URI:-http://milvus-minio:9000} - MODEL_DIR_IN_DOCKER=/models - RUNNING_IN_DOCKER=true - - NO_PROXY=localhost,127.0.0.1,milvus,graph,milvus-minio,milvus-etcd-dev,etcd,minio,mineru,paddlex,api.siliconflow.cn - - no_proxy=localhost,127.0.0.1,milvus,graph,milvus-minio,milvus-etcd-dev,etcd,minio,mineru,paddlex,api.siliconflow.cn + - SANDBOX_PROVIDER=${SANDBOX_PROVIDER:-provisioner} + - SANDBOX_PROVISIONER_URL=${SANDBOX_PROVISIONER_URL:-http://sandbox-provisioner:8002} + - SANDBOX_VIRTUAL_PATH_PREFIX=${SANDBOX_VIRTUAL_PATH_PREFIX:-/mnt/user-data} + - SANDBOX_EXEC_TIMEOUT_SECONDS=${SANDBOX_EXEC_TIMEOUT_SECONDS:-180} + - SANDBOX_MAX_OUTPUT_BYTES=${SANDBOX_MAX_OUTPUT_BYTES:-262144} + - NO_PROXY=localhost,127.0.0.1,host.docker.internal,milvus,graph,milvus-minio,milvus-etcd-dev,etcd,minio,mineru,paddlex,sandbox-provisioner,api.siliconflow.cn + - no_proxy=localhost,127.0.0.1,host.docker.internal,milvus,graph,milvus-minio,milvus-etcd-dev,etcd,minio,mineru,paddlex,sandbox-provisioner,api.siliconflow.cn command: uv run --no-dev arq server.worker_main.WorkerSettings restart: unless-stopped depends_on: @@ -116,13 +128,63 @@ services: condition: service_healthy minio: condition: service_healthy + sandbox-provisioner: + condition: service_healthy + + sandbox-provisioner: + build: + context: ./docker/sandbox_provisioner + dockerfile: Dockerfile + image: yuxi-sandbox-provisioner:0.5.2.dev + container_name: sandbox-provisioner + volumes: + - ./saves:/app/saves + - ./docker/sandbox_provisioner/app.py:/app/app.py:ro + - /var/run/docker.sock:/var/run/docker.sock + ports: + - "8002:8002" + networks: + - app-network + extra_hosts: + - "host.docker.internal:host-gateway" + environment: + - PROVISIONER_BACKEND=${SANDBOX_PROVISIONER_BACKEND:-local} + - K8S_NAMESPACE=${SANDBOX_K8S_NAMESPACE:-yuxi-know} + - SANDBOX_IMAGE=${SANDBOX_IMAGE:-enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest} + - SANDBOX_CONTAINER_PORT=${SANDBOX_CONTAINER_PORT:-8080} + - SKILLS_HOST_PATH=${SANDBOX_SKILLS_HOST_PATH:-/app/saves/skills} + - THREADS_HOST_PATH=${SANDBOX_THREADS_HOST_PATH:-/app/saves/threads} + - NODE_HOST=${SANDBOX_NODE_HOST:-host.docker.internal} + - KUBECONFIG_PATH=${KUBECONFIG_PATH:-} + - MEMORY_SANDBOX_URL_TEMPLATE=${MEMORY_SANDBOX_URL_TEMPLATE:-http://agent-sandbox:8000} + - HTTP_PROXY=${SANDBOX_HTTP_PROXY:-} + - HTTPS_PROXY=${SANDBOX_HTTPS_PROXY:-} + - NO_PROXY=localhost,127.0.0.1,host.docker.internal + - DOCKER_NETWORK=${SANDBOX_DOCKER_NETWORK:-yuxi-know_app-network} + - DOCKER_THREADS_HOST_PATH=${SANDBOX_DOCKER_THREADS_HOST_PATH:-} + - DOCKER_SKILLS_HOST_PATH=${SANDBOX_DOCKER_SKILLS_HOST_PATH:-} + - DOCKER_SANDBOX_PREFIX=${SANDBOX_DOCKER_SANDBOX_PREFIX:-yuxi-sandbox} + - DOCKER_SANDBOX_HOST=${SANDBOX_DOCKER_SANDBOX_HOST:-host.docker.internal} + - SANDBOX_IDLE_TIMEOUT_SECONDS=${SANDBOX_IDLE_TIMEOUT_SECONDS:-120} + - SANDBOX_IDLE_CHECK_INTERVAL_SECONDS=${SANDBOX_IDLE_CHECK_INTERVAL_SECONDS:-10} + - SANDBOX_EXEC_TIMEOUT_SECONDS=${SANDBOX_EXEC_TIMEOUT_SECONDS:-180} + command: > + sh -lc "python -c 'import docker' >/dev/null 2>&1 || pip install --no-cache-dir 'docker>=7.1.0'; + uvicorn app:app --host 0.0.0.0 --port 8002" + healthcheck: + test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8002/health').read()"] + interval: 10s + timeout: 5s + retries: 6 + start_period: 10s + restart: unless-stopped web: build: context: . dockerfile: docker/web.Dockerfile target: development - image: yuxi-web:0.5.dev + image: yuxi-web:0.5.2.dev container_name: web-dev volumes: - ./web/src:/app/src @@ -265,6 +327,8 @@ services: redis: image: redis:7-alpine container_name: redis + ports: + - "6379:6379" command: redis-server --appendonly yes healthcheck: test: ["CMD", "redis-cli", "ping"] diff --git a/docker/sandbox_provisioner/Dockerfile b/docker/sandbox_provisioner/Dockerfile new file mode 100644 index 000000000..0a37ea7be --- /dev/null +++ b/docker/sandbox_provisioner/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.12-slim + +WORKDIR /app + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt + +COPY app.py /app/app.py + +EXPOSE 8002 + +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8002"] diff --git a/docker/sandbox_provisioner/app.py b/docker/sandbox_provisioner/app.py new file mode 100644 index 000000000..217e70de6 --- /dev/null +++ b/docker/sandbox_provisioner/app.py @@ -0,0 +1,739 @@ +from __future__ import annotations + +import logging +import os +import threading +import time +from contextlib import asynccontextmanager +from dataclasses import dataclass +from pathlib import Path +from urllib import request + +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel + +logger = logging.getLogger(__name__) + + +class CreateSandboxRequest(BaseModel): + sandbox_id: str + thread_id: str + + +class SandboxResponse(BaseModel): + sandbox_id: str + sandbox_url: str + status: str | None = None + + +class DeleteSandboxResponse(BaseModel): + ok: bool + sandbox_id: str + + +class TouchSandboxResponse(BaseModel): + ok: bool + sandbox_id: str + status: str | None = None + + +class ListSandboxesResponse(BaseModel): + sandboxes: list[SandboxResponse] + count: int + + +@dataclass(slots=True) +class SandboxRecord: + sandbox_id: str + sandbox_url: str + status: str | None = None + + +class MemoryProvisionerBackend: + def __init__(self): + self._lock = threading.Lock() + self._records: dict[str, SandboxRecord] = {} + self._url_template = os.getenv("MEMORY_SANDBOX_URL_TEMPLATE", "http://agent-sandbox:8000") + + def _url_for(self, sandbox_id: str) -> str: + template = self._url_template + if "{sandbox_id}" in template: + return template.format(sandbox_id=sandbox_id) + return template + + def create(self, sandbox_id: str, thread_id: str) -> SandboxRecord: + _ = thread_id # unused in memory backend + with self._lock: + existing = self._records.get(sandbox_id) + if existing is not None: + return existing + record = SandboxRecord( + sandbox_id=sandbox_id, + sandbox_url=self._url_for(sandbox_id), + status="Running", + ) + self._records[sandbox_id] = record + return record + + def discover(self, sandbox_id: str) -> SandboxRecord | None: + with self._lock: + return self._records.get(sandbox_id) + + def list(self) -> list[SandboxRecord]: + with self._lock: + return list(self._records.values()) + + def delete(self, sandbox_id: str) -> None: + with self._lock: + self._records.pop(sandbox_id, None) + + +def wait_for_sandbox_ready(sandbox_url: str, timeout_seconds: int = 30) -> bool: + deadline = time.time() + timeout_seconds + opener = request.build_opener(request.ProxyHandler({})) + while time.time() < deadline: + try: + with opener.open(f"{sandbox_url.rstrip('/')}/v1/sandbox", timeout=3) as response: + status_code = getattr(response, "status", 200) + if status_code == 200: + return True + except Exception: + pass + time.sleep(1) + return False + + +class LocalContainerProvisionerBackend: + def __init__(self): + import docker + from docker.errors import DockerException + + self._docker = docker + self._lock = threading.Lock() + self._container_port = int(os.getenv("SANDBOX_CONTAINER_PORT", "8080")) + self._sandbox_image = os.getenv( + "SANDBOX_IMAGE", + "enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest", + ) + self._network = os.getenv("DOCKER_NETWORK") + self._threads_host_path = os.getenv("DOCKER_THREADS_HOST_PATH") + self._skills_host_path = os.getenv("DOCKER_SKILLS_HOST_PATH") + self._container_prefix = os.getenv("DOCKER_SANDBOX_PREFIX", "yuxi-sandbox") + self._sandbox_host = os.getenv("DOCKER_SANDBOX_HOST", "host.docker.internal") + self._health_timeout_seconds = int(os.getenv("SANDBOX_HEALTH_TIMEOUT_SECONDS", "30")) + + try: + self._client = docker.from_env() + self._client.ping() + except DockerException as exc: + raise RuntimeError(f"docker backend unavailable: {exc}") from exc + + self._resolve_host_paths() + + @staticmethod + def _validate_thread_id(thread_id: str) -> str: + candidate = str(thread_id or "").strip() + if not candidate: + raise ValueError("thread_id is required") + if any(ch in candidate for ch in ("/", "\\", "\x00")): + raise ValueError("thread_id must be a single safe path segment") + if candidate in {".", ".."} or ".." in candidate: + raise ValueError("thread_id contains invalid path traversal sequence") + return candidate + + @staticmethod + def _sanitize_id(value: str) -> str: + sanitized = "".join(ch if ch.isalnum() or ch in "-_" else "-" for ch in value.strip().lower()) + return sanitized[:48] or "sandbox" + + def _container_name(self, sandbox_id: str) -> str: + return f"{self._container_prefix}-{self._sanitize_id(sandbox_id)}" + + def _resolve_host_paths(self) -> None: + if self._threads_host_path and self._skills_host_path: + return + + container_id = os.getenv("HOSTNAME", "").strip() + if not container_id: + raise RuntimeError("HOSTNAME is required to infer docker backend host paths") + + inspected = self._client.api.inspect_container(container_id) + mounts = inspected.get("Mounts") or [] + + saves_source = None + for mount in mounts: + destination = (mount.get("Destination") or "").rstrip("/") + if destination == "/app/saves": + saves_source = mount.get("Source") + break + + if not saves_source: + raise RuntimeError("cannot infer host path for /app/saves mount") + + base = Path(saves_source) + if not self._threads_host_path: + self._threads_host_path = str(base / "threads") + if not self._skills_host_path: + self._skills_host_path = str(base / "skills") + + def _host_port_for(self, container) -> int | None: + ports = (container.attrs.get("NetworkSettings") or {}).get("Ports") or {} + bindings = ports.get(f"{self._container_port}/tcp") + if not bindings: + return None + host_port = bindings[0].get("HostPort") + if not host_port: + return None + return int(host_port) + + def _sandbox_url(self, host_port: int) -> str: + return f"http://{self._sandbox_host}:{host_port}" + + def _to_record(self, container, sandbox_id: str) -> SandboxRecord: + state = (container.attrs.get("State") or {}).get("Status") + host_port = self._host_port_for(container) + sandbox_url = self._sandbox_url(host_port) if host_port is not None else "" + return SandboxRecord( + sandbox_id=sandbox_id, + sandbox_url=sandbox_url, + status=state or "unknown", + ) + + @staticmethod + def _ensure_user_data_writable(container) -> None: + cmd = ( + "sh -lc " + '"mkdir -p /home/gem/user-data/workspace /home/gem/user-data/uploads /home/gem/user-data/outputs ' + '&& chmod -R a+rwX /home/gem/user-data"' + ) + result = container.exec_run(cmd, user="0:0") + if result.exit_code != 0: + output = ( + result.output.decode("utf-8", errors="ignore") + if isinstance(result.output, bytes) + else str(result.output) + ) + raise RuntimeError(f"failed to ensure writable thread user-data mount: {output}") + + def _get_container(self, sandbox_id: str): + from docker.errors import NotFound + + name = self._container_name(sandbox_id) + try: + return self._client.containers.get(name) + except NotFound: + return None + + def create(self, sandbox_id: str, thread_id: str) -> SandboxRecord: + with self._lock: + safe_thread_id = self._validate_thread_id(thread_id) + existing = self._get_container(sandbox_id) + if existing is not None: + if existing.status != "running": + existing.start() + existing.reload() + self._ensure_user_data_writable(existing) + record = self._to_record(existing, sandbox_id) + if not record.sandbox_url: + raise RuntimeError(f"sandbox {sandbox_id} has no mapped host port") + if not wait_for_sandbox_ready(record.sandbox_url, timeout_seconds=self._health_timeout_seconds): + raise RuntimeError(f"sandbox {sandbox_id} is not ready at {record.sandbox_url}") + return record + + threads_root = Path(self._threads_host_path).resolve() + thread_user_data = (threads_root / safe_thread_id / "user-data").resolve() + try: + thread_user_data.relative_to(threads_root) + except ValueError as exc: + raise ValueError("thread_id resolved outside threads host root") from exc + thread_user_data.mkdir(parents=True, exist_ok=True) + + skills_path = Path(self._skills_host_path) + skills_path.mkdir(parents=True, exist_ok=True) + + container_name = self._container_name(sandbox_id) + run_kwargs = { + "name": container_name, + "detach": True, + "labels": { + "app": "yuxi-sandbox", + "sandbox-id": sandbox_id, + "thread-id": thread_id, + "managed-by": "yuxi-sandbox-provisioner", + }, + "volumes": { + str(thread_user_data): {"bind": "/home/gem/user-data", "mode": "rw"}, + str(skills_path): {"bind": "/skills", "mode": "ro"}, + }, + "ports": {f"{self._container_port}/tcp": None}, + "security_opt": ["seccomp=unconfined"], + } + if self._network: + run_kwargs["network"] = self._network + + container = self._client.containers.run(self._sandbox_image, **run_kwargs) + container.reload() + self._ensure_user_data_writable(container) + record = self._to_record(container, sandbox_id) + if not record.sandbox_url: + raise RuntimeError(f"sandbox {sandbox_id} has no mapped host port") + if not wait_for_sandbox_ready(record.sandbox_url, timeout_seconds=self._health_timeout_seconds): + raise RuntimeError(f"sandbox {sandbox_id} is not ready at {record.sandbox_url}") + return record + + def discover(self, sandbox_id: str) -> SandboxRecord | None: + container = self._get_container(sandbox_id) + if container is None: + return None + container.reload() + record = self._to_record(container, sandbox_id) + if not record.sandbox_url: + return None + if not wait_for_sandbox_ready(record.sandbox_url, timeout_seconds=5): + return None + return record + + def list(self) -> list[SandboxRecord]: + containers = self._client.containers.list( + all=True, filters={"label": ["app=yuxi-sandbox", "managed-by=yuxi-sandbox-provisioner"]} + ) + records: list[SandboxRecord] = [] + for container in containers: + labels = container.labels or {} + sandbox_id = labels.get("sandbox-id") + if sandbox_id: + container.reload() + records.append(self._to_record(container, sandbox_id)) + return records + + def delete(self, sandbox_id: str) -> None: + container = self._get_container(sandbox_id) + if container is None: + return + if container.status == "running": + container.stop(timeout=10) + container.remove(v=True, force=True) + + +class KubernetesProvisionerBackend: + def __init__(self): + from kubernetes import client, config + + self._lock = threading.Lock() + self._namespace = os.getenv("K8S_NAMESPACE", "yuxi-know") + self._sandbox_image = os.getenv( + "SANDBOX_IMAGE", + "enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest", + ) + self._skill_pvc = os.getenv("SKILLS_PVC", "yuxi-skills") + self._thread_pvc = os.getenv("THREAD_PVC", "yuxi-thread") + self._node_host = os.getenv("NODE_HOST", "host.docker.internal") + self._container_port = int(os.getenv("SANDBOX_CONTAINER_PORT", "8080")) + + kubeconfig_path = os.getenv("KUBECONFIG_PATH") + if kubeconfig_path: + config.load_kube_config(config_file=kubeconfig_path) + else: + try: + config.load_incluster_config() + except Exception: + config.load_kube_config() + + self._core_api = client.CoreV1Api() + self._client = client + + @staticmethod + def _pod_name(sandbox_id: str) -> str: + return f"sandbox-{sandbox_id}" + + @staticmethod + def _service_name(sandbox_id: str) -> str: + return f"sandbox-{sandbox_id}" + + def _build_pod_spec(self, sandbox_id: str, thread_id: str): + pod_name = self._pod_name(sandbox_id) + return self._client.V1Pod( + metadata=self._client.V1ObjectMeta( + name=pod_name, + labels={"app": "yuxi-sandbox", "sandbox-id": sandbox_id}, + annotations={"thread-id": thread_id}, + ), + spec=self._client.V1PodSpec( + restart_policy="Never", + security_context=self._client.V1PodSecurityContext( + fs_group=0, + run_as_user=0, + ), + init_containers=[ + self._client.V1Container( + name="init-user-data", + image=self._sandbox_image, + command=["sh", "-c"], + args=[ + "chmod 777 /home/gem " + "&& mkdir -p /home/gem/user-data/workspace /home/gem/user-data/uploads /home/gem/user-data/outputs " + "&& chmod -R 777 /home/gem/user-data ", + ], + volume_mounts=[ + self._client.V1VolumeMount(name="home-dir", mount_path="/home/gem"), + self._client.V1VolumeMount( + name="shared-data", + mount_path="/home/gem/user-data", + sub_path=f"threads/{thread_id}/user-data", + ), + ], + ), + ], + containers=[ + self._client.V1Container( + name="sandbox", + image=self._sandbox_image, + ports=[self._client.V1ContainerPort(container_port=self._container_port)], + volume_mounts=[ + self._client.V1VolumeMount(name="home-dir", mount_path="/home/gem"), + self._client.V1VolumeMount( + name="shared-data", + mount_path="/home/gem/user-data", + sub_path=f"threads/{thread_id}/user-data", + ), + self._client.V1VolumeMount( + name="shared-data", + mount_path="/skills", + sub_path="skills", + read_only=False, + ), + ], + ) + ], + volumes=[ + self._client.V1Volume( + name="shared-data", + persistent_volume_claim=self._client.V1PersistentVolumeClaimVolumeSource( + claim_name=self._thread_pvc, + read_only=False, + ), + ), + self._client.V1Volume( + name="home-dir", + empty_dir=self._client.V1EmptyDirVolumeSource(), + ), + ], + ), + ) + + def _build_service_spec(self, sandbox_id: str): + service_name = self._service_name(sandbox_id) + return self._client.V1Service( + metadata=self._client.V1ObjectMeta( + name=service_name, + labels={"app": "yuxi-sandbox", "sandbox-id": sandbox_id}, + ), + spec=self._client.V1ServiceSpec( + type="NodePort", + selector={"sandbox-id": sandbox_id}, + ports=[ + self._client.V1ServicePort( + name="http", + port=self._container_port, + target_port=self._container_port, + protocol="TCP", + ) + ], + ), + ) + + def create(self, sandbox_id: str, thread_id: str) -> SandboxRecord: + from kubernetes.client.rest import ApiException + + with self._lock: + discovered = self.discover(sandbox_id) + if discovered is not None: + return discovered + + self._pod_name(sandbox_id) + self._service_name(sandbox_id) + + try: + self._core_api.create_namespaced_pod( + namespace=self._namespace, + body=self._build_pod_spec(sandbox_id, thread_id), + ) + except ApiException as exc: + if exc.status != 409: + raise + + try: + self._core_api.create_namespaced_service( + namespace=self._namespace, + body=self._build_service_spec(sandbox_id), + ) + except ApiException as exc: + if exc.status != 409: + raise + + health_timeout = int(os.getenv("SANDBOX_HEALTH_TIMEOUT_SECONDS", "60")) + record = self.discover(sandbox_id) + if record is None: + raise RuntimeError(f"failed to discover sandbox after create: {sandbox_id}") + if not wait_for_sandbox_ready(record.sandbox_url, timeout_seconds=health_timeout): + try: + self.delete(sandbox_id) + except Exception: + pass + raise RuntimeError(f"sandbox {sandbox_id} is not ready at {record.sandbox_url}") + return record + + def discover(self, sandbox_id: str) -> SandboxRecord | None: + from kubernetes.client.rest import ApiException + + pod_name = self._pod_name(sandbox_id) + service_name = self._service_name(sandbox_id) + try: + pod = self._core_api.read_namespaced_pod(name=pod_name, namespace=self._namespace) + service = self._core_api.read_namespaced_service(name=service_name, namespace=self._namespace) + except ApiException as exc: + if exc.status == 404: + return None + raise + + node_port = None + if service.spec and service.spec.ports: + node_port = service.spec.ports[0].node_port + if not node_port: + sandbox_url = "" + else: + sandbox_url = f"http://{self._node_host}:{node_port}" + + return SandboxRecord( + sandbox_id=sandbox_id, + sandbox_url=sandbox_url, + status=(pod.status.phase if pod and pod.status else "Unknown"), + ) + + def list(self) -> list[SandboxRecord]: + from kubernetes.client.rest import ApiException + + try: + pod_list = self._core_api.list_namespaced_pod( + namespace=self._namespace, + label_selector="app=yuxi-sandbox", + ) + except ApiException: + return [] + + records: list[SandboxRecord] = [] + for pod in pod_list.items: + sandbox_id = (pod.metadata.labels or {}).get("sandbox-id") + if not sandbox_id: + continue + record = self.discover(sandbox_id) + if record is not None: + records.append(record) + return records + + def delete(self, sandbox_id: str) -> None: + from kubernetes.client.rest import ApiException + + pod_name = self._pod_name(sandbox_id) + service_name = self._service_name(sandbox_id) + + for delete_call in ( + lambda: self._core_api.delete_namespaced_service(name=service_name, namespace=self._namespace), + lambda: self._core_api.delete_namespaced_pod(name=pod_name, namespace=self._namespace), + ): + try: + delete_call() + except ApiException as exc: + if exc.status != 404: + raise + + +class SandboxIdleReaper: + def __init__(self, backend): + self._backend = backend + self._lock = threading.Lock() + self._last_activity_at: dict[str, float] = {} + self._stop_event = threading.Event() + self._thread: threading.Thread | None = None + self._exec_timeout_seconds = int(os.getenv("SANDBOX_EXEC_TIMEOUT_SECONDS", "180")) + configured_idle_timeout = int(os.getenv("SANDBOX_IDLE_TIMEOUT_SECONDS", "600")) + if 0 < configured_idle_timeout <= self._exec_timeout_seconds: + logger.warning( + "SANDBOX_IDLE_TIMEOUT_SECONDS=%s is <= SANDBOX_EXEC_TIMEOUT_SECONDS=%s; " + "adjusting idle timeout to %s seconds to avoid reaping running commands", + configured_idle_timeout, + self._exec_timeout_seconds, + self._exec_timeout_seconds + 30, + ) + configured_idle_timeout = self._exec_timeout_seconds + 30 + self._idle_timeout_seconds = configured_idle_timeout + self._check_interval_seconds = max(1, int(os.getenv("SANDBOX_IDLE_CHECK_INTERVAL_SECONDS", "10"))) + + def touch(self, sandbox_id: str) -> None: + with self._lock: + self._last_activity_at[sandbox_id] = time.time() + + def forget(self, sandbox_id: str) -> None: + with self._lock: + self._last_activity_at.pop(sandbox_id, None) + + def _seed_existing(self) -> None: + try: + records = self._backend.list() + except Exception as exc: # noqa: BLE001 + logger.warning(f"Failed to seed sandbox activity for idle reaper: {exc}") + return + + now = time.time() + with self._lock: + for record in records: + self._last_activity_at.setdefault(record.sandbox_id, now) + + def _collect_expired_sandbox_ids(self) -> list[str]: + if self._idle_timeout_seconds <= 0: + return [] + cutoff = time.time() - self._idle_timeout_seconds + with self._lock: + return [sandbox_id for sandbox_id, last_at in self._last_activity_at.items() if last_at <= cutoff] + + def _run(self) -> None: + while not self._stop_event.wait(self._check_interval_seconds): + expired_ids = self._collect_expired_sandbox_ids() + for sandbox_id in expired_ids: + try: + self._backend.delete(sandbox_id) + logger.info(f"Deleted idle sandbox: {sandbox_id}") + self.forget(sandbox_id) + except Exception as exc: # noqa: BLE001 + logger.warning(f"Failed to delete idle sandbox {sandbox_id}: {exc}") + + def start(self) -> None: + if self._idle_timeout_seconds <= 0: + logger.info("Idle reaper disabled (SANDBOX_IDLE_TIMEOUT_SECONDS <= 0)") + return + self._seed_existing() + self._thread = threading.Thread(target=self._run, name="sandbox-idle-reaper", daemon=True) + self._thread.start() + logger.info( + "Started sandbox idle reaper with timeout=%ss interval=%ss", + self._idle_timeout_seconds, + self._check_interval_seconds, + ) + + def shutdown(self) -> None: + self._stop_event.set() + if self._thread is not None: + self._thread.join(timeout=3) + + +def _build_backend(): + backend = (os.getenv("PROVISIONER_BACKEND", "memory") or "memory").strip().lower() + if backend in {"docker", "local"}: + return LocalContainerProvisionerBackend(), backend + if backend == "kubernetes": + return KubernetesProvisionerBackend(), backend + return MemoryProvisionerBackend(), backend + + +backend_impl, backend_name = _build_backend() +idle_reaper = SandboxIdleReaper(backend_impl) + + +@asynccontextmanager +async def lifespan(_app: FastAPI): + idle_reaper.start() + try: + yield + finally: + idle_reaper.shutdown() + + +app = FastAPI(title="Yuxi Sandbox Provisioner", lifespan=lifespan) + + +@app.get("/health") +def health(): + tracked = len(idle_reaper._last_activity_at) # noqa: SLF001 + return { + "status": "ok", + "backend": backend_name, + "idle_timeout_seconds": idle_reaper._idle_timeout_seconds, # noqa: SLF001 + "idle_check_interval_seconds": idle_reaper._check_interval_seconds, # noqa: SLF001 + "tracked_sandboxes": tracked, + } + + +@app.post("/api/sandboxes", response_model=SandboxResponse) +def create_sandbox(payload: CreateSandboxRequest): + try: + # Backend.create() already handles container reuse (discovers existing container first) + record = backend_impl.create(payload.sandbox_id, payload.thread_id) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=500, detail=str(exc)) from exc + idle_reaper.touch(record.sandbox_id) + return SandboxResponse( + sandbox_id=record.sandbox_id, + sandbox_url=record.sandbox_url, + status=record.status, + ) + + +@app.get("/api/sandboxes/{sandbox_id}", response_model=SandboxResponse) +def get_sandbox(sandbox_id: str): + try: + record = backend_impl.discover(sandbox_id) + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=500, detail=str(exc)) from exc + + if record is None: + raise HTTPException(status_code=404, detail="sandbox not found") + idle_reaper.touch(record.sandbox_id) + + return SandboxResponse( + sandbox_id=record.sandbox_id, + sandbox_url=record.sandbox_url, + status=record.status, + ) + + +@app.post("/api/sandboxes/{sandbox_id}/touch", response_model=TouchSandboxResponse) +def touch_sandbox(sandbox_id: str): + try: + record = backend_impl.discover(sandbox_id) + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=500, detail=str(exc)) from exc + if record is None: + raise HTTPException(status_code=404, detail="sandbox not found") + idle_reaper.touch(sandbox_id) + return TouchSandboxResponse(ok=True, sandbox_id=sandbox_id, status=record.status) + + +@app.get("/api/sandboxes", response_model=ListSandboxesResponse) +def list_sandboxes(): + try: + records = backend_impl.list() + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=500, detail=str(exc)) from exc + + sandboxes = [ + SandboxResponse( + sandbox_id=record.sandbox_id, + sandbox_url=record.sandbox_url, + status=record.status, + ) + for record in records + ] + return ListSandboxesResponse(sandboxes=sandboxes, count=len(sandboxes)) + + +@app.delete("/api/sandboxes/{sandbox_id}", response_model=DeleteSandboxResponse) +def delete_sandbox(sandbox_id: str): + try: + backend_impl.delete(sandbox_id) + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=500, detail=str(exc)) from exc + idle_reaper.forget(sandbox_id) + + return DeleteSandboxResponse(ok=True, sandbox_id=sandbox_id) diff --git a/docker/sandbox_provisioner/requirements.txt b/docker/sandbox_provisioner/requirements.txt new file mode 100644 index 000000000..7feabc1fc --- /dev/null +++ b/docker/sandbox_provisioner/requirements.txt @@ -0,0 +1,4 @@ +fastapi>=0.121 +uvicorn[standard]>=0.34.2 +kubernetes>=31.0.0 +docker>=7.1.0 diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index 164b7ff68..78d60817b 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -60,7 +60,8 @@ export default defineConfig({ { text: '文档解析', link: '/latest/advanced/document-processing' }, { text: '品牌自定义', link: '/latest/advanced/branding' }, { text: '其他配置', link: '/latest/advanced/misc' }, - { text: '生产部署', link: '/latest/advanced/deployment' } + { text: '生产部署', link: '/latest/advanced/deployment' }, + { text: 'Kubernetes 部署', link: '/latest/advanced/kubernetes-deployment' } ] }, { diff --git a/pyproject.toml b/pyproject.toml index 687759592..75ddfb064 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,12 +63,16 @@ dependencies = [ "arq>=0.26.3", "chardet>=5.0.0", "deepagents>=0.2.5", + "agent-sandbox>=0.0.26", "json-repair>=0.54.0", "torch>=2.8.0", "torchvision==0.23", "docling>=2.68.0", "loguru>=0.7.3", "redis>=5.2.0", + "aioboto3>=13.0.0", + "wcmatch>=8.0.0", + "psycopg[binary,pool]>=3.3.3", ] [tool.ruff] line-length = 120 # 代码最大行宽 diff --git a/server/main.py b/server/main.py index 2534dcb79..6b07caf28 100644 --- a/server/main.py +++ b/server/main.py @@ -1,4 +1,16 @@ import asyncio +import os +import sys + +# ============================================================================== +# 解决 Windows 下 psycopg 异步模式不支持 ProactorEventLoop 的问题 +# 注意:这段代码必须放在应用的极早期,最好在导入 FastAPI 或初始化数据库之前 +# ============================================================================== +if sys.platform == "win32": + # 把当前文件 (main.py) 的上一级的上一级 (即根目录 Yuxi-Know) 加入到 sys.path + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + import time from collections import defaultdict, deque @@ -124,4 +136,12 @@ async def dispatch(self, request: Request, call_next): app.add_middleware(AuthMiddleware) if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=5050, threads=10, workers=10, reload=True) + # uvicorn.run(app, host="0.0.0.0", port=5050, threads=10, workers=10, reload=True) + + uvicorn.run( + "server.main:app", + host="0.0.0.0", + port=5050, + reload=True, + reload_dirs=["server", "src"], + ) diff --git a/server/routers/chat_router.py b/server/routers/chat_router.py index c5231c308..82edc0c09 100644 --- a/server/routers/chat_router.py +++ b/server/routers/chat_router.py @@ -1,9 +1,10 @@ import traceback import uuid from typing import Any +from mimetypes import guess_type from fastapi import APIRouter, Body, Depends, HTTPException, Query, UploadFile, File -from fastapi.responses import StreamingResponse +from fastapi.responses import FileResponse, StreamingResponse from pydantic import BaseModel, Field from sqlalchemy.ext.asyncio import AsyncSession @@ -30,6 +31,11 @@ update_thread_view, upload_thread_attachment_view, ) +from src.services.thread_files_service import ( + list_thread_files_view, + read_thread_file_content_view, + resolve_thread_artifact_view, +) from src.services.feedback_service import get_message_feedback_view, submit_message_feedback_view from src.services.history_query_service import get_agent_history_view from src.repositories.agent_config_repository import AgentConfigRepository @@ -734,7 +740,9 @@ class AttachmentResponse(BaseModel): file_size: int status: str uploaded_at: str - truncated: bool | None = False + path: str + artifact_url: str | None = None + minio_url: str | None = None class AttachmentLimits(BaseModel): @@ -747,6 +755,29 @@ class AttachmentListResponse(BaseModel): limits: AttachmentLimits +class ThreadFileEntry(BaseModel): + path: str + name: str + is_dir: bool + size: int + modified_at: str | None = None + artifact_url: str | None = None + + +class ThreadFileListResponse(BaseModel): + path: str + files: list[ThreadFileEntry] + + +class ThreadFileContentResponse(BaseModel): + path: str + content: list[str] + offset: int + limit: int + total_lines: int + artifact_url: str + + # ============================================================================= # > === 会话管理分组 === # ============================================================================= @@ -822,7 +853,7 @@ async def upload_thread_attachment( db: AsyncSession = Depends(get_db), current_user: User = Depends(get_required_user), ): - """上传并解析附件为 Markdown,附加到指定对话线程。""" + """上传原始附件并关联到指定对话线程。""" return await upload_thread_attachment_view( thread_id=thread_id, file=file, @@ -861,6 +892,65 @@ async def delete_thread_attachment( ) +@chat.get("/thread/{thread_id}/files", response_model=ThreadFileListResponse) +async def list_thread_files( + thread_id: str, + path: str = Query("/mnt/user-data"), + recursive: bool = Query(False), + db: AsyncSession = Depends(get_db), + current_user: User = Depends(get_required_user), +): + """列出线程文件目录。""" + return await list_thread_files_view( + thread_id=thread_id, + current_user_id=str(current_user.id), + db=db, + path=path, + recursive=recursive, + ) + + +@chat.get("/thread/{thread_id}/files/content", response_model=ThreadFileContentResponse) +async def read_thread_file_content( + thread_id: str, + path: str = Query(...), + offset: int = Query(0, ge=0), + limit: int = Query(2000, ge=1, le=5000), + db: AsyncSession = Depends(get_db), + current_user: User = Depends(get_required_user), +): + """读取线程文本文件(按行分页)。""" + return await read_thread_file_content_view( + thread_id=thread_id, + current_user_id=str(current_user.id), + db=db, + path=path, + offset=offset, + limit=limit, + ) + + +@chat.get("/thread/{thread_id}/artifacts/{path:path}") +async def get_thread_artifact( + thread_id: str, + path: str, + download: bool = Query(False), + db: AsyncSession = Depends(get_db), + current_user: User = Depends(get_required_user), +): + """下载或预览线程文件。""" + file_path = await resolve_thread_artifact_view( + thread_id=thread_id, + current_user_id=str(current_user.id), + db=db, + path=path, + ) + + media_type = guess_type(file_path.name)[0] or "application/octet-stream" + headers = {"Content-Disposition": f'attachment; filename="{file_path.name}"'} if download else None + return FileResponse(path=file_path, media_type=media_type, headers=headers) + + # ============================================================================= # > === 消息反馈分组 === # ============================================================================= diff --git a/server/utils/lifespan.py b/server/utils/lifespan.py index 42ff8e1d0..730bd4cec 100644 --- a/server/utils/lifespan.py +++ b/server/utils/lifespan.py @@ -1,12 +1,14 @@ from contextlib import asynccontextmanager from fastapi import FastAPI +from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver -from src.services.task_service import tasker from src.services.mcp_service import init_mcp_servers from src.services.run_queue_service import close_queue_clients, get_redis_client +from src.services.task_service import tasker from src.storage.postgres.manager import pg_manager from src.knowledge import knowledge_base +from src.sandbox import init_sandbox_provider, shutdown_sandbox_provider from src.utils import logger @@ -41,8 +43,21 @@ async def lifespan(app: FastAPI): except Exception as e: logger.warning(f"Run queue redis unavailable on startup: {e}") + try: + init_sandbox_provider() + except Exception as e: + logger.error(f"Failed to initialize sandbox provider during startup: {e}") + + # ========================================================= + # 2. 核心修复:在这里执行一次 setup(),建完表就拉倒 + # ========================================================= + checkpointer = AsyncPostgresSaver(pg_manager.langgraph_pool) + await checkpointer.setup() + print("LangGraph Checkpoint tables verified/created!") + await tasker.start() yield await tasker.shutdown() + shutdown_sandbox_provider() await close_queue_clients() await pg_manager.close() diff --git a/server/worker_main.py b/server/worker_main.py index a9ede96aa..983df2b39 100644 --- a/server/worker_main.py +++ b/server/worker_main.py @@ -1,5 +1,15 @@ """ARQ worker entrypoint.""" +import asyncio +import os +import sys + +# 必须放在最顶层! +if sys.platform == "win32": + # 把当前文件 (main.py) 的上一级的上一级 (即根目录 Yuxi-Know) 加入到 sys.path + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + from src.services.run_worker import WorkerSettings __all__ = ["WorkerSettings"] diff --git a/src/agents/common/backends/composite.py b/src/agents/common/backends/composite.py index d52402617..370dc0332 100644 --- a/src/agents/common/backends/composite.py +++ b/src/agents/common/backends/composite.py @@ -1,10 +1,71 @@ -from deepagents.backends import CompositeBackend, StateBackend +from __future__ import annotations -from src.agents.common.middlewares.skills_middleware import normalize_selected_skills +from deepagents.backends.composite import ( + CompositeBackend, + _route_for_path, + _remap_file_info_path, + _strip_route_from_pattern, +) +from deepagents.backends.protocol import FileInfo +from src.agents.common.middlewares.skills_middleware import normalize_selected_skills +from src.sandbox import ProvisionerSandboxBackend from .skills_backend import SelectedSkillsReadonlyBackend +class CustomCompositeBackend(CompositeBackend): + """修复 glob_info 路由逻辑的 CompositeBackend。 + + 修复内容:当 path 不匹配任何路由时应该只搜索 default 后端, + 而不是错误地遍历所有路由后端搜索。 + """ + + def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]: + backend, backend_path, route_prefix = _route_for_path( + default=self.default, + sorted_routes=self.sorted_routes, + path=path, + ) + if route_prefix is not None: + infos = backend.glob_info(pattern, backend_path) + return [_remap_file_info_path(fi, route_prefix) for fi in infos] + + # 只在 path 为 None 或 "/" 时搜索所有后端,其他只搜索 default + if path is None or path == "/": + results: list[FileInfo] = [] + results.extend(self.default.glob_info(pattern, path)) + for route_prefix, backend in self.routes.items(): + route_pattern = _strip_route_from_pattern(pattern, route_prefix) + infos = backend.glob_info(route_pattern, "/") + results.extend(_remap_file_info_path(fi, route_prefix) for fi in infos) + results.sort(key=lambda x: x.get("path", "")) + return results + + return self.default.glob_info(pattern, path) + + async def aglob_info(self, pattern: str, path: str = "/") -> list[FileInfo]: + backend, backend_path, route_prefix = _route_for_path( + default=self.default, + sorted_routes=self.sorted_routes, + path=path, + ) + if route_prefix is not None: + infos = await backend.aglob_info(pattern, backend_path) + return [_remap_file_info_path(fi, route_prefix) for fi in infos] + + if path is None or path == "/": + results: list[FileInfo] = [] + results.extend(await self.default.aglob_info(pattern, path)) + for route_prefix, backend in self.routes.items(): + route_pattern = _strip_route_from_pattern(pattern, route_prefix) + infos = await backend.aglob_info(route_pattern, "/") + results.extend(_remap_file_info_path(fi, route_prefix) for fi in infos) + results.sort(key=lambda x: x.get("path", "")) + return results + + return await self.default.aglob_info(pattern, path) + + def _get_visible_skills_from_runtime(runtime) -> list[str]: """获取运行时可见的 skills 列表""" context = getattr(runtime, "context", None) @@ -12,11 +73,28 @@ def _get_visible_skills_from_runtime(runtime) -> list[str]: return normalize_selected_skills(selected) +def _extract_thread_id(runtime) -> str: + config = getattr(runtime, "config", None) + if isinstance(config, dict): + configurable = config.get("configurable", {}) + if isinstance(configurable, dict): + thread_id = configurable.get("thread_id") + if isinstance(thread_id, str) and thread_id.strip(): + return thread_id.strip() + + context = getattr(runtime, "context", None) + thread_id = getattr(context, "thread_id", None) + if isinstance(thread_id, str) and thread_id.strip(): + return thread_id.strip() + + raise ValueError("thread_id is required in runtime configurable context") + + def create_agent_composite_backend(runtime) -> CompositeBackend: - """为 agent 构建 backend:默认 StateBackend + /skills 路由只读 backend。""" visible_skills = _get_visible_skills_from_runtime(runtime) - return CompositeBackend( - default=StateBackend(runtime), + thread_id = _extract_thread_id(runtime) + return CustomCompositeBackend( + default=ProvisionerSandboxBackend(thread_id=thread_id), routes={ "/skills/": SelectedSkillsReadonlyBackend(selected_slugs=visible_skills), }, diff --git a/src/agents/common/base.py b/src/agents/common/base.py index 4de16af36..16a631373 100644 --- a/src/agents/common/base.py +++ b/src/agents/common/base.py @@ -1,11 +1,9 @@ from __future__ import annotations -import asyncio import importlib.util import os import tomllib as tomli from abc import abstractmethod -from inspect import isawaitable from pathlib import Path from langgraph.checkpoint.memory import InMemorySaver @@ -14,6 +12,7 @@ from src import config as sys_config from src.agents.common.context import BaseContext +from src.storage.postgres.manager import pg_manager from src.utils import logger @@ -30,7 +29,6 @@ class BaseAgent: def __init__(self, **kwargs): self.graph = None # will be covered by get_graph self.checkpointer = None - self._checkpointer_cm = None self._async_conn = None self.workdir = Path(sys_config.save_dir) / "agents" / self.module_name self.workdir.mkdir(parents=True, exist_ok=True) @@ -159,15 +157,6 @@ async def get_history(self, user_id, thread_id) -> list[dict]: def reload_graph(self): """重置 graph 缓存,强制下次调用 get_graph 时重新构建""" self.graph = None - self.checkpointer = None - if self._checkpointer_cm is not None: - try: - loop = asyncio.get_running_loop() - except RuntimeError: - loop = None - - if loop is not None: - loop.create_task(self._close_checkpointer_context()) logger.info(f"{self.name} graph 缓存已清空,将在下次调用时重新构建") @abstractmethod @@ -211,40 +200,15 @@ async def _create_postgres_checkpointer(self): logger.warning(f"langgraph postgres checkpointer 不可用,回退 sqlite: {e}") return None - conn_str = postgres_url.replace("+asyncpg", "") try: - saver_factory = getattr(AsyncPostgresSaver, "from_conn_string", None) - if callable(saver_factory): - saver = saver_factory(conn_str) - else: - saver = AsyncPostgresSaver(conn_str) # type: ignore[call-arg] - - if hasattr(saver, "__aenter__") and hasattr(saver, "__aexit__"): - self._checkpointer_cm = saver - saver = await saver.__aenter__() + saver = AsyncPostgresSaver(pg_manager.langgraph_pool) - setup_fn = getattr(saver, "setup", None) - if callable(setup_fn): - result = setup_fn() - if isawaitable(result): - await result logger.info(f"{self.name} 使用 postgres checkpointer") return saver except Exception as e: logger.warning(f"初始化 postgres checkpointer 失败,回退 sqlite: {e}") return None - async def _close_checkpointer_context(self): - if self._checkpointer_cm is None: - return - - cm = self._checkpointer_cm - self._checkpointer_cm = None - try: - await cm.__aexit__(None, None, None) - except Exception as e: - logger.warning(f"关闭 postgres checkpointer 失败: {e}") - async def get_async_conn(self) -> aiosqlite.Connection: """获取异步数据库连接""" if self._async_conn is not None: diff --git a/src/agents/common/middlewares/attachment_middleware.py b/src/agents/common/middlewares/attachment_middleware.py index 0e4235085..cd6bef557 100644 --- a/src/agents/common/middlewares/attachment_middleware.py +++ b/src/agents/common/middlewares/attachment_middleware.py @@ -1,6 +1,7 @@ -"""附件注入中间件 - 使用 LangChain 标准中间件实现 +"""Attachment prompt injection middleware. -从 State 中读取附件信息,注入提示词让模型使用 read_file 工具读取附件内容。 +Read uploaded file metadata from LangGraph state and inject readable paths +into the system prompt so the model can use `read_file` on demand. """ from __future__ import annotations @@ -18,67 +19,51 @@ class AttachmentState(AgentState): - """扩展 AgentState 以支持附件""" + """Extended state schema with uploaded files.""" - attachments: NotRequired[list[dict]] + uploads: NotRequired[list[dict]] -def _build_attachment_prompt(attachments: Sequence[dict]) -> str | None: - """Render attachments into a system prompt block with file paths. - - 提示模型使用 read_file 工具读取附件内容。 - """ - if not attachments: +def _build_attachment_prompt(uploads: Sequence[dict]) -> str | None: + """Render uploads into a concise prompt block.""" + if not uploads: return None - valid_attachments = [a for a in attachments if a.get("status") == "parsed"] + valid_uploads: list[tuple[str, str]] = [] + for upload in uploads: + path = upload.get("path") + if not isinstance(path, str) or not path.strip(): + continue + file_name = upload.get("file_name", "未知文件") + valid_uploads.append((str(file_name), path)) - if not valid_attachments: + if not valid_uploads: return None - attachment_infos: list[str] = [] - for attachment in valid_attachments: - file_name = attachment.get("file_name", "未知文件") - file_path = attachment.get("file_path", "") - truncated = "(已截断)" if attachment.get("truncated") else "" - - if file_path: - attachment_infos.append(f"- {file_name}{truncated}: {file_path}") - else: - attachment_infos.append(f"- {file_name}{truncated}") - + upload_infos = [f"- {file_name}: {path}" for file_name, path in valid_uploads] lines = [ - "用户上传了以下附件:", + "用户上传了以下文件:", "", - *attachment_infos, + *upload_infos, "", - "请使用 read_file 工具读取附件内容后,再回答用户的问题。", + "请优先使用 `read_file` 工具读取这些路径中的文件内容,再回答用户问题。", ] - return "\n".join(lines) class AttachmentMiddleware(AgentMiddleware[AttachmentState]): - """ - LangChain 标准中间件:从 State 中读取附件并注入提示词。 - - LangGraph 会自动从 checkpointer 恢复 state,包括 attachments。 - 从 request.state 中读取附件,将其转换为上下文块 并注入到系统提示词中。 - """ + """Inject upload context from state.uploads into system prompt.""" state_schema = AttachmentState async def awrap_model_call( self, request: ModelRequest, handler: Callable[[ModelRequest], ModelResponse] ) -> ModelResponse: - # 从 state 获取附件(LangGraph 自动从 checkpointer 恢复) - attachments = request.state.get("attachments", []) - logger.info(f"AttachmentMiddleware: found {len(attachments)} attachments in state") - - if attachments: - # 构建附件提示 - attachment_prompt = _build_attachment_prompt(attachments) + uploads = request.state.get("uploads", []) + logger.info(f"AttachmentMiddleware: found {len(uploads)} uploads in state") + if uploads: + attachment_prompt = _build_attachment_prompt(uploads) if attachment_prompt: logger.info("AttachmentMiddleware: injecting attachment prompt") existing_blocks = list(request.system_message.content_blocks) if request.system_message else [] @@ -100,8 +85,5 @@ async def awrap_model_call( return await handler(request) -# 创建中间件实例,供其他模块使用 save_attachments_to_fs = AttachmentMiddleware() - -# 保留旧名称以保持向后兼容(已废弃) inject_attachment_context = save_attachments_to_fs diff --git a/src/agents/reporter/graph.py b/src/agents/reporter/graph.py index 5d1731b78..df4ee0d36 100644 --- a/src/agents/reporter/graph.py +++ b/src/agents/reporter/graph.py @@ -1,90 +1,61 @@ from dataclasses import dataclass, field from typing import Annotated -from deepagents.backends import StateBackend from deepagents.middleware.filesystem import FilesystemMiddleware from langchain.agents import create_agent from src.agents.common import BaseAgent, BaseContext, load_chat_model -from src.agents.common.middlewares import ( - RuntimeConfigMiddleware, - save_attachments_to_fs, -) +from src.agents.common.backends import create_agent_composite_backend +from src.agents.common.middlewares import RuntimeConfigMiddleware, save_attachments_to_fs from src.agents.common.toolkits.mysql import get_mysql_tools from src.services.mcp_service import get_mcp_server_names, get_tools_from_all_servers from src.utils import logger def _create_fs_backend(rt): - """创建文件存储后端""" - return StateBackend(rt) + return create_agent_composite_backend(rt) -PROMPT = """你的任务是根据用户的指令,使用数据库工具和图表绘制工具,构建 SQL 查询报告。 -你需要根据用户的指令,生成相应的 SQL 查询,并将查询结果以报表的形式返回给用户。 -在生成报表时,你可以调用工具生成图表,以更直观地展示数据。 -务必确保生成的 SQL 查询是正确且高效的,以避免对数据库造成不必要的负担。 -在生成报表时,请遵循以下原则: -1. 理解用户的指令,明确报表的需求和目标。 -2. 图表生成工具的返回结果不会默认渲染,需要在最终的报表中以图片形式(markdown格式)嵌入。 -3. 必要时,使用网络检索相关工具补充信息。 +PROMPT = """你的任务是根据用户指令,使用数据库工具和图表工具生成 SQL 报告。 +请先理解需求,再给出准确且高效的 SQL 查询;必要时调用图表工具并在最终回答中以 Markdown 图片形式展示结果。 """ @dataclass(kw_only=True) class ReporterContext(BaseContext): - """覆盖 BaseContext,定义数据库报表助手智能体的可配置参数""" - - # 覆盖 system_prompt,提供更具体的默认值 system_prompt: Annotated[str, {"__template_metadata__": {"kind": "prompt"}}] = field( default=PROMPT, - metadata={"name": "系统提示词", "description": "用来描述智能体的角色和行为"}, + metadata={"name": "系统提示词", "description": "描述 SQL 报告助手的行为"}, ) - mcps: Annotated[list[str], {"__template_metadata__": {"kind": "mcps"}}] = field( default_factory=lambda: ["mcp-server-chart"], metadata={ - "name": "MCP服务器", + "name": "MCP 服务", "options": lambda: get_mcp_server_names(), - "description": ( - "MCP服务器列表,建议使用支持 SSE 的 MCP 服务器," - "如果需要使用 uvx 或 npx 运行的服务器,也请在项目外部启动 MCP 服务器,并在项目中配置 MCP 服务器。" - ), + "description": "报告场景默认启用图表 MCP。", }, ) class SqlReporterAgent(BaseAgent): - name = "数据库报表助手" - description = ( - "一个能够生成 SQL 查询报告的智能体助手。同时调用 Charts MCP 生成图表。" - "MySQL 工具默认启用,无法选择,mcp 默认启用 Charts MCPs。" - ) + name = "数据报表助手" + description = "根据用户需求生成 SQL 查询并输出图表化报告。" context_schema = ReporterContext - capabilities = [ - "file_upload", - "files", - ] - - def __init__(self, **kwargs): - super().__init__(**kwargs) + capabilities = ["file_upload", "files"] async def get_graph(self, **kwargs): - """构建图""" context = self.context_schema.from_file(module_name=self.module_name) all_mcp_tools = await get_tools_from_all_servers() - graph = create_agent( model=load_chat_model(context.model), system_prompt=context.system_prompt, - tools=get_mysql_tools(), # MySQL 工具默认启用,这里添加的 tools,不会在工具选择框中出现 + tools=get_mysql_tools(), middleware=[ - FilesystemMiddleware(backend=_create_fs_backend), # 文件系统后端 + FilesystemMiddleware(backend=_create_fs_backend), RuntimeConfigMiddleware(extra_tools=all_mcp_tools), - save_attachments_to_fs, # 附件保存到文件系统 + save_attachments_to_fs, ], checkpointer=await self._get_checkpointer(), ) - - logger.info("SqlReporterAgent 构建成功") + logger.info("SqlReporterAgent graph created") return graph diff --git a/src/config/app.py b/src/config/app.py index 98543fe53..4534e6628 100644 --- a/src/config/app.py +++ b/src/config/app.py @@ -7,13 +7,15 @@ - 默认配置定义在代码中 """ +from __future__ import annotations + import os from pathlib import Path from typing import Any import tomli import tomli_w -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, PrivateAttr from src.config.static.models import ( DEFAULT_CHAT_MODEL_PROVIDERS, @@ -72,6 +74,16 @@ class Config(BaseModel): # ============================================================ default_agent_id: str = Field(default="ChatbotAgent", description="默认智能体ID") + # ============================================================ + # Sandbox 配置 + # ============================================================ + sandbox_provider: str = Field(default="provisioner", description="沙箱提供者") + sandbox_provisioner_url: str = Field(default="http://sandbox-provisioner:8002", description="沙箱服务地址") + sandbox_virtual_path_prefix: str = Field(default="/mnt/user-data", description="沙箱虚拟路径前缀") + sandbox_exec_timeout_seconds: int = Field(default=180, description="沙箱执行超时时间(秒)") + sandbox_max_output_bytes: int = Field(default=262144, description="沙箱最大输出字节数") + sandbox_keepalive_interval_seconds: int = Field(default=30, description="沙箱保活间隔(秒)") + # ============================================================ # 模型信息(只读,不持久化) # ============================================================ @@ -106,9 +118,9 @@ class Config(BaseModel): ) # 内部状态 - _config_file: Path | None = None - _user_modified_fields: set[str] = set() - _modified_providers: set[str] = set() # 记录具体修改的模型提供商 + _config_file: Path | None = PrivateAttr(default=None) + _user_modified_fields: set[str] = PrivateAttr(default_factory=set) + _modified_providers: set[str] = PrivateAttr(default_factory=set) # 记录具体修改的模型提供商 model_config = {"arbitrary_types_allowed": True, "extra": "allow"} @@ -119,13 +131,13 @@ def __init__(self, **data): self._load_custom_providers() self._handle_environment() - def _setup_paths(self): + def _setup_paths(self) -> None: """设置配置文件路径""" self.save_dir = os.getenv("SAVE_DIR") or self.save_dir self._config_file = Path(self.save_dir) / "config" / "base.toml" self._config_file.parent.mkdir(parents=True, exist_ok=True) - def _load_user_config(self): + def _load_user_config(self) -> None: """从 TOML 文件加载用户配置""" if not self._config_file or not self._config_file.exists(): logger.info(f"Config file not found, using defaults: {self._config_file}") @@ -157,22 +169,21 @@ def _load_user_config(self): except Exception as e: logger.error(f"Failed to load config from {self._config_file}: {e}") - def _load_model_names(self, model_names_data): + def _load_model_names(self, model_names_data: dict[str, Any]) -> None: """加载用户自定义的模型配置""" - try: - for provider, provider_data in model_names_data.items(): + for provider, provider_data in (model_names_data or {}).items(): + try: if provider in self.model_names: - # 更新现有提供商的模型列表 - if "models" in provider_data: - self.model_names[provider].models = provider_data["models"] + # 合并现有提供商的配置 + merged = self.model_names[provider].model_dump() | dict(provider_data or {}) + self.model_names[provider] = ChatModelProvider(**merged) else: # 添加新的提供商 self.model_names[provider] = ChatModelProvider(**provider_data) - logger.info(f"Loaded custom model configurations for {len(model_names_data)} providers") - except Exception as e: - logger.error(f"Failed to load model names: {e}") + except Exception as e: # noqa: BLE001 + logger.warning(f"Skip invalid model provider config {provider}: {e}") - def _load_custom_providers(self): + def _load_custom_providers(self) -> None: """从独立的TOML文件加载自定义供应商配置""" custom_config_file = self._config_file.parent / "custom_providers.toml" @@ -192,17 +203,17 @@ def _load_custom_providers(self): except Exception as e: logger.error(f"Failed to load custom providers from {custom_config_file}: {e}") - def _load_custom_model_providers(self, providers_data): + def _load_custom_model_providers(self, providers_data: dict[str, Any]) -> None: """加载自定义模型供应商""" - try: - for provider, provider_data in providers_data.items(): - provider_data["custom"] = True - self.model_names[provider] = ChatModelProvider(**provider_data) - logger.info(f"Loaded {len(providers_data)} custom model providers") - except Exception as e: - logger.error(f"Failed to load custom model providers: {e}") - - def _handle_environment(self): + for provider, provider_data in (providers_data or {}).items(): + try: + payload = dict(provider_data or {}) + payload["custom"] = True + self.model_names[provider] = ChatModelProvider(**payload) + except Exception as e: # noqa: BLE001 + logger.warning(f"Skip invalid custom provider {provider}: {e}") + + def _handle_environment(self) -> None: """处理环境变量和运行时状态""" # 处理模型目录 self.model_dir = os.environ.get("MODEL_DIR") or self.model_dir @@ -233,10 +244,36 @@ def _handle_environment(self): # 获取可用的模型提供商 self.valuable_model_provider = [k for k, v in self.model_provider_status.items() if v] + # 处理 Sandbox 配置 + self.sandbox_provider = (os.getenv("SANDBOX_PROVIDER") or self.sandbox_provider or "provisioner").strip() + self.sandbox_provisioner_url = ( + os.getenv("SANDBOX_PROVISIONER_URL") or self.sandbox_provisioner_url or "http://sandbox-provisioner:8002" + ).strip() + self.sandbox_virtual_path_prefix = ( + os.getenv("SANDBOX_VIRTUAL_PATH_PREFIX") or self.sandbox_virtual_path_prefix or "/mnt/user-data" + ).strip() + self.sandbox_exec_timeout_seconds = int( + os.getenv("SANDBOX_EXEC_TIMEOUT_SECONDS") or self.sandbox_exec_timeout_seconds or 180 + ) + self.sandbox_max_output_bytes = int( + os.getenv("SANDBOX_MAX_OUTPUT_BYTES") or self.sandbox_max_output_bytes or 262144 + ) + self.sandbox_keepalive_interval_seconds = int( + os.getenv("SANDBOX_KEEPALIVE_INTERVAL_SECONDS") or self.sandbox_keepalive_interval_seconds or 30 + ) + + # 验证 Sandbox 配置 + if self.sandbox_provider.lower() != "provisioner": + raise ValueError("Only sandbox_provider=provisioner is supported.") + if not self.sandbox_provisioner_url: + raise ValueError("SANDBOX_PROVISIONER_URL is required when sandbox provider is provisioner.") + if not self.sandbox_virtual_path_prefix.startswith("/"): + self.sandbox_virtual_path_prefix = f"/{self.sandbox_virtual_path_prefix}" + if not self.valuable_model_provider: raise ValueError("No model provider available, please check your `.env` file.") - def save(self): + def save(self) -> None: """保存配置到 TOML 文件(仅保存用户修改的字段)""" if not self._config_file: logger.warning("Config file path not set") @@ -339,7 +376,7 @@ def __setitem__(self, key: str, value: Any): logger.warning("Using deprecated dict-style assignment for Config. Please use attribute access instead.") setattr(self, key, value) - def update(self, other: dict): + def update(self, other: dict[str, Any]) -> None: """批量更新配置(兼容旧代码)""" for key, value in other.items(): if hasattr(self, key): @@ -347,7 +384,7 @@ def update(self, other: dict): else: logger.warning(f"Unknown config key: {key}") - def _save_models_to_file(self, provider_name: str = None): + def _save_models_to_file(self, provider_name: str | None = None) -> None: """保存模型配置到主配置文件 Args: @@ -397,7 +434,7 @@ def _save_models_to_file(self, provider_name: str = None): # 自定义供应商管理方法 # ============================================================ - def add_custom_provider(self, provider_id: str, provider_data: dict) -> bool: + def add_custom_provider(self, provider_id: str, provider_data: dict[str, Any]) -> bool: """添加自定义供应商 Args: @@ -526,7 +563,7 @@ def get_custom_providers(self) -> dict[str, ChatModelProvider]: """ return {k: v for k, v in self.model_names.items() if v.custom} - def _save_custom_providers(self): + def _save_custom_providers(self) -> None: """保存自定义供应商到独立配置文件""" if not self._config_file: logger.warning("Config file path not set") diff --git a/src/knowledge/manager.py b/src/knowledge/manager.py index 25e4a40ad..f266b1654 100644 --- a/src/knowledge/manager.py +++ b/src/knowledge/manager.py @@ -268,16 +268,21 @@ async def get_databases_by_user_id(self, user_id: str) -> dict: return {"databases": []} return await self.get_databases_by_user(user) - async def get_databases_by_user(self, user: User) -> dict: + async def get_databases_by_user(self, user: User | dict) -> dict: """根据用户权限获取知识库列表""" - # 构建用户信息字典 - user_info = { - "role": user.role, - "department_id": user.department_id, - } + # 构建用户信息字典(支持 User 对象或 dict) + if isinstance(user, dict): + user_info = user + else: + user_info = { + "role": user.role, + "department_id": user.department_id, + } - logger.info(f"Getting databases for user {user.id} with role {user.role} and department {user.department_id}") + user_role = user_info.get("role") + user_dept = user_info.get("department_id") + logger.info(f"Getting databases for user with role {user_role} and department {user_dept}") all_databases = (await self.get_databases()).get("databases", []) diff --git a/src/repositories/conversation_repository.py b/src/repositories/conversation_repository.py index ae8d3f0f1..ee49e64c2 100644 --- a/src/repositories/conversation_repository.py +++ b/src/repositories/conversation_repository.py @@ -151,6 +151,15 @@ async def add_tool_call( error_message: str | None = None, langgraph_tool_call_id: str | None = None, ) -> ToolCall: + if langgraph_tool_call_id: + existing = await self.get_tool_call_by_langgraph_id(langgraph_tool_call_id) + if existing: + logger.debug( + "Tool call already exists for langgraph_tool_call_id=%s, skip insert", + langgraph_tool_call_id, + ) + return existing + tool_call = ToolCall( message_id=message_id, tool_name=tool_name, @@ -333,7 +342,10 @@ async def update_stats( async def get_tool_call_by_langgraph_id(self, langgraph_tool_call_id: str) -> ToolCall | None: result = await self.db.execute( - select(ToolCall).where(ToolCall.langgraph_tool_call_id == langgraph_tool_call_id) + select(ToolCall) + .where(ToolCall.langgraph_tool_call_id == langgraph_tool_call_id) + .order_by(ToolCall.created_at.desc()) + .limit(1) ) return result.scalar_one_or_none() diff --git a/src/sandbox/__init__.py b/src/sandbox/__init__.py new file mode 100644 index 000000000..f92274ad9 --- /dev/null +++ b/src/sandbox/__init__.py @@ -0,0 +1,35 @@ +from .backend import ProvisionerSandboxBackend +from .paths import ( + VIRTUAL_PATH_PREFIX, + ensure_thread_dirs, + resolve_virtual_path, + sandbox_outputs_dir, + sandbox_uploads_dir, + sandbox_user_data_dir, + sandbox_workspace_dir, + virtual_path_for_thread_file, +) +from .provider import ( + ProvisionerSandboxProvider, + get_sandbox_provider, + init_sandbox_provider, + sandbox_id_for_thread, + shutdown_sandbox_provider, +) + +__all__ = [ + "ProvisionerSandboxBackend", + "ProvisionerSandboxProvider", + "VIRTUAL_PATH_PREFIX", + "ensure_thread_dirs", + "get_sandbox_provider", + "init_sandbox_provider", + "resolve_virtual_path", + "sandbox_id_for_thread", + "sandbox_outputs_dir", + "sandbox_uploads_dir", + "sandbox_user_data_dir", + "sandbox_workspace_dir", + "shutdown_sandbox_provider", + "virtual_path_for_thread_file", +] diff --git a/src/sandbox/backend.py b/src/sandbox/backend.py new file mode 100644 index 000000000..cfe02dee3 --- /dev/null +++ b/src/sandbox/backend.py @@ -0,0 +1,359 @@ +from __future__ import annotations + +import base64 +from datetime import datetime +from pathlib import PurePosixPath +from typing import Any + +from deepagents.backends.protocol import ( + EditResult, + ExecuteResponse, + FileDownloadResponse, + FileInfo, + FileUploadResponse, + GrepMatch, + WriteResult, +) +from deepagents.backends.sandbox import BaseSandbox + +from src import config as conf +from src.utils.logging_config import logger + +from .provider import get_sandbox_provider, sandbox_id_for_thread + + +def _normalize_path(path: str) -> str: + raw = str(path or "").strip() + if not raw: + raise ValueError("path is required") + normalized = "/" + raw.lstrip("/") + pure = PurePosixPath(normalized) + if ".." in pure.parts: + raise ValueError("path traversal is not allowed") + return str(pure) + + +class ProvisionerSandboxBackend(BaseSandbox): + def __init__(self, thread_id: str): + self._thread_id = str(thread_id or "").strip() + if not self._thread_id: + raise ValueError("thread_id is required for ProvisionerSandboxBackend") + + self._provider = get_sandbox_provider() + self._id = sandbox_id_for_thread(self._thread_id) + self._client: Any | None = None + self._client_url: str | None = None + self._command_timeout_seconds = int(getattr(conf, "sandbox_exec_timeout_seconds", 180)) + self._max_output_bytes = int(getattr(conf, "sandbox_max_output_bytes", 262_144)) + + @property + def id(self) -> str: + return self._id + + def _build_client(self, sandbox_url: str): + try: + from agent_sandbox import Sandbox as AgentSandboxClient + except Exception as exc: # noqa: BLE001 + raise RuntimeError( + "agent-sandbox is required. Install dependency `agent-sandbox` in the docker image." + ) from exc + + return AgentSandboxClient(base_url=sandbox_url, timeout=self._command_timeout_seconds) + + def _get_client(self) -> Any: + connection = self._provider.get(self._thread_id, create_if_missing=True) + if connection is None: + raise RuntimeError(f"sandbox is unavailable for thread {self._thread_id}") + + if self._client is None or self._client_url != connection.sandbox_url: + self._client = self._build_client(connection.sandbox_url) + self._client_url = connection.sandbox_url + + return self._client + + def _read_binary(self, path: str, offset: int = 0, limit: int | None = None) -> bytes: + """Read file content from the sandbox file API and normalize it to bytes. + + The underlying API may return base64 text, raw bytes, or plain strings. + This helper is the single normalization point used by read(), edit(), and + download_files() so all read paths share the same transport semantics. + """ + start_line = max(0, int(offset)) if offset else None + end_line = (start_line + int(limit)) if limit and start_line is not None else None + + result = self._get_client().file.read_file( + file=path, + start_line=start_line, + end_line=end_line, + ) + + content = result.data.content + if content is None: + return b"" + if isinstance(content, bytes): + return content + if not isinstance(content, str): + return str(content).encode("utf-8") + + try: + return base64.b64decode(content, validate=True) + except Exception: # noqa: BLE001 + return content.encode("utf-8") + + def read( + self, + file_path: str, + offset: int = 0, + limit: int = 2000, + ) -> str: + """Read file content via the sandbox file API and render a text view. + + This stays on top of _read_binary() so the backend has one consistent + read path for base64 transport, raw bytes, and text-like responses. + """ + normalized_path = _normalize_path(file_path) + start = max(0, int(offset)) + + try: + content = self._read_binary(normalized_path, offset=offset, limit=limit) + except Exception: # noqa: BLE001 + return f"Error: File '{file_path}' not found" + + if not content: + return "System reminder: File exists but has empty contents" + + text = content.decode("utf-8", errors="replace") + if not text: + return "" + + lines = text.splitlines() + return "\n".join(f"{start + idx + 1:6d}\t{line}" for idx, line in enumerate(lines)) + + def execute(self, command: str, *, timeout: int | None = None) -> ExecuteResponse: + """Execute a shell command in the sandbox. + + Output is normalized to text and truncated to the configured maximum + payload size before being returned. + """ + try: + kwargs: dict[str, Any] = {"command": command} + if timeout is not None: + kwargs["timeout"] = timeout + result = self._get_client().shell.exec_command(**kwargs) + + output = result.data.output or "" + exit_code = result.data.exit_code + + truncated = False + encoded = output.encode("utf-8", errors="ignore") + if len(encoded) > self._max_output_bytes: + output = encoded[: self._max_output_bytes].decode("utf-8", errors="ignore") + truncated = True + + return ExecuteResponse( + output=output, + exit_code=exit_code if isinstance(exit_code, int) else None, + truncated=truncated, + ) + except Exception as exc: # noqa: BLE001 + logger.error(f"Sandbox execute failed for thread {self._thread_id}: {exc}") + return ExecuteResponse(output=f"Error: {exc}", exit_code=1, truncated=False) + + def ls_info(self, path: str) -> list[FileInfo]: + """List direct children of a sandbox path with lightweight metadata.""" + normalized_path = _normalize_path(path) + try: + result = self._get_client().file.list_path(path=normalized_path, recursive=False, include_size=True) + except Exception: # noqa: BLE001 + return [] + + entries = result.data.files or [] + infos: list[FileInfo] = [] + for entry in entries: + info: FileInfo = {"path": entry.path, "is_dir": entry.is_directory} + size = entry.size + if isinstance(size, int): + info["size"] = size + modified_time = entry.modified_time + if modified_time: + if isinstance(modified_time, str) and modified_time.isdigit(): + info["modified_at"] = datetime.fromtimestamp(int(modified_time)).isoformat() + elif isinstance(modified_time, str): + try: + info["modified_at"] = datetime.fromisoformat(modified_time).isoformat() + except ValueError: + info["modified_at"] = modified_time + elif isinstance(modified_time, (int, float)): + info["modified_at"] = datetime.fromtimestamp(modified_time).isoformat() + infos.append(info) + return infos + + def write(self, file_path: str, content: str) -> WriteResult: + """Write a new text file. + + This method is intentionally text-only. Binary payloads should go through + upload_files(), which uses base64 encoding for the sandbox file API. + """ + normalized_path = _normalize_path(file_path) + if not isinstance(content, str): + return WriteResult(error="Error: write() only supports text content; use upload_files() for binary data") + try: + self._read_binary(normalized_path) + except Exception: # noqa: BLE001 + pass + else: + return WriteResult(error=f"Error: File '{file_path}' already exists") + + try: + result = self._get_client().file.write_file(file=normalized_path, content=content) + if not result.success: + return WriteResult(error=result.message or f"Failed to write file '{file_path}'") + except Exception as exc: # noqa: BLE001 + return WriteResult(error=str(exc) or f"Failed to write file '{file_path}'") + + return WriteResult(path=normalized_path, files_update=None) + + def edit( + self, + file_path: str, + old_string: str, + new_string: str, + replace_all: bool = False, # noqa: FBT001, FBT002 + ) -> EditResult: + """Edit an existing text file by replacing string content. + + This method operates on UTF-8-decoded text content only. Binary files + are not supported here and should be handled via download/upload flows. + """ + normalized_path = _normalize_path(file_path) + + # Check if old_string exists + try: + text = self._read_binary(normalized_path).decode("utf-8", errors="replace") + except Exception: # noqa: BLE001 + return EditResult(error=f"Error: File '{file_path}' not found") + + count = text.count(old_string) + if count == 0: + return EditResult(error=f"Error: String not found in file: '{old_string}'") + if count > 1 and not replace_all: + return EditResult( + error=( + f"Error: String '{old_string}' appears multiple times. " + "Use replace_all=True to replace all occurrences." + ) + ) + + # Use str_replace_editor API + replace_mode = "ALL" if replace_all else "FIRST" + try: + result = self._get_client().file.str_replace_editor( + command="str_replace", + path=normalized_path, + old_str=old_string, + new_str=new_string, + replace_mode=replace_mode, + ) + if not result.data.success: + return EditResult(error=result.data.message or f"Error editing file '{file_path}'") + except Exception as exc: # noqa: BLE001 + return EditResult(error=f"Error editing file: {exc}") + + return EditResult(path=normalized_path, files_update=None, occurrences=count if replace_all else 1) + + def grep_raw( + self, + pattern: str, + path: str | None = None, + glob: str | None = None, + ) -> list[GrepMatch] | str: + """Search file contents under a path and return raw line matches. + + The sandbox file API is used directly with fixed-string matching and an + optional include glob. + """ + search_path = _normalize_path(path or "/") + + try: + return super().grep_raw(pattern=pattern, path=search_path, glob=glob) + + except Exception as exc: # noqa: BLE001 + return str(exc) + + def glob_info(self, pattern: str, path: str = "/") -> list[FileInfo]: + """Return files matching a glob pattern with optional metadata.""" + normalized_path = _normalize_path(path) + + try: + # return super().glob_info(pattern=pattern, path=path) + result = self._get_client().file.find_files( + path=normalized_path, + glob=pattern, + ) + except Exception: # noqa: BLE001 + return [] + + infos: list[FileInfo] = [] + for file_path in result.data.files or []: + infos.append({"path": file_path}) + return infos + + def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]: + """Upload binary or text file payloads via the sandbox file API. + + Contents are base64-encoded before calling the remote write_file API so + arbitrary bytes can be transferred safely. + """ + responses: list[FileUploadResponse] = [] + for path, content in files: + try: + normalized_path = _normalize_path(path) + result = self._get_client().file.write_file( + file=normalized_path, + content=base64.b64encode(content).decode("ascii"), + encoding="base64", + ) + if not result.success: + raise Exception(result.message or "Upload failed") + responses.append(FileUploadResponse(path=normalized_path, error=None)) + except PermissionError: + normalized_path = str(path) + responses.append(FileUploadResponse(path=normalized_path, error="permission_denied")) + except IsADirectoryError: + normalized_path = str(path) + responses.append(FileUploadResponse(path=normalized_path, error="is_directory")) + except FileNotFoundError: + normalized_path = str(path) + responses.append(FileUploadResponse(path=normalized_path, error="file_not_found")) + except Exception as exc: # noqa: BLE001 + normalized_path = str(path) + logger.warning(f"Upload to sandbox failed for {normalized_path}: {exc}") + responses.append(FileUploadResponse(path=normalized_path, error="invalid_path")) + return responses + + def download_files(self, paths: list[str]) -> list[FileDownloadResponse]: + """Download file payloads as raw bytes from the sandbox file API. + + The underlying API is read with base64 encoding and decoded back into + bytes by _read_binary(). + """ + responses: list[FileDownloadResponse] = [] + for path in paths: + try: + normalized_path = _normalize_path(path) + content = self._read_binary(normalized_path) + responses.append(FileDownloadResponse(path=normalized_path, content=content, error=None)) + except PermissionError: + normalized_path = str(path) + responses.append(FileDownloadResponse(path=normalized_path, content=None, error="permission_denied")) + except IsADirectoryError: + normalized_path = str(path) + responses.append(FileDownloadResponse(path=normalized_path, content=None, error="is_directory")) + except FileNotFoundError: + normalized_path = str(path) + responses.append(FileDownloadResponse(path=normalized_path, content=None, error="file_not_found")) + except Exception as exc: # noqa: BLE001 + normalized_path = str(path) + logger.warning(f"Download from sandbox failed for {normalized_path}: {exc}") + responses.append(FileDownloadResponse(path=normalized_path, content=None, error="invalid_path")) + return responses diff --git a/src/sandbox/paths.py b/src/sandbox/paths.py new file mode 100644 index 000000000..d0cef160c --- /dev/null +++ b/src/sandbox/paths.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import re +from pathlib import Path + +from src import config as conf + +DEFAULT_VIRTUAL_PATH_PREFIX = "/mnt/user-data" +VIRTUAL_PATH_PREFIX = DEFAULT_VIRTUAL_PATH_PREFIX + +_SAFE_THREAD_ID_RE = re.compile(r"^[A-Za-z0-9_-]+$") + + +def get_virtual_path_prefix() -> str: + configured = str(getattr(conf, "sandbox_virtual_path_prefix", "") or "").strip() + if not configured: + return DEFAULT_VIRTUAL_PATH_PREFIX + return "/" + configured.strip("/") + + +def _validate_thread_id(thread_id: str) -> str: + value = str(thread_id or "").strip() + if not value: + raise ValueError("thread_id is required") + if not _SAFE_THREAD_ID_RE.match(value): + raise ValueError("thread_id contains invalid characters") + return value + + +def _thread_root_dir(thread_id: str) -> Path: + safe_thread_id = _validate_thread_id(thread_id) + return Path(conf.save_dir) / "threads" / safe_thread_id / "user-data" + + +def sandbox_user_data_dir(thread_id: str) -> Path: + return _thread_root_dir(thread_id) + + +def sandbox_workspace_dir(thread_id: str) -> Path: + return _thread_root_dir(thread_id) / "workspace" + + +def sandbox_uploads_dir(thread_id: str) -> Path: + return _thread_root_dir(thread_id) / "uploads" + + +def sandbox_outputs_dir(thread_id: str) -> Path: + return _thread_root_dir(thread_id) / "outputs" + + +def ensure_thread_dirs(thread_id: str) -> None: + sandbox_workspace_dir(thread_id).mkdir(parents=True, exist_ok=True) + sandbox_uploads_dir(thread_id).mkdir(parents=True, exist_ok=True) + sandbox_outputs_dir(thread_id).mkdir(parents=True, exist_ok=True) + + +def resolve_virtual_path(thread_id: str, virtual_path: str) -> Path: + clean_virtual_path = "/" + str(virtual_path or "").strip().lstrip("/") + virtual_prefix = get_virtual_path_prefix() + + if clean_virtual_path != virtual_prefix and not clean_virtual_path.startswith(f"{virtual_prefix}/"): + raise ValueError(f"path must start with {virtual_prefix}") + + relative_path = clean_virtual_path[len(virtual_prefix) :].lstrip("/") + base_dir = sandbox_user_data_dir(thread_id).resolve() + target_path = (base_dir / relative_path).resolve() + + try: + target_path.relative_to(base_dir) + except ValueError as exc: + raise ValueError("path traversal detected") from exc + + return target_path + + +def virtual_path_for_thread_file(thread_id: str, path: str | Path) -> str: + base_dir = sandbox_user_data_dir(thread_id).resolve() + target_path = Path(path).resolve() + + try: + relative_path = target_path.relative_to(base_dir) + except ValueError as exc: + raise ValueError("file is outside thread user-data directory") from exc + + prefix = get_virtual_path_prefix().rstrip("/") + if not str(relative_path): + return prefix + return f"{prefix}/{relative_path.as_posix()}" diff --git a/src/sandbox/provider.py b/src/sandbox/provider.py new file mode 100644 index 000000000..c2503b17d --- /dev/null +++ b/src/sandbox/provider.py @@ -0,0 +1,168 @@ +from __future__ import annotations + +import hashlib +import threading +import time +from dataclasses import dataclass + +from src import config as conf +from src.utils.logging_config import logger + +from .provisioner_client import ProvisionerClient, SandboxRecord + + +def sandbox_id_for_thread(thread_id: str) -> str: + digest = hashlib.sha256(thread_id.encode("utf-8")).hexdigest() + return digest[:12] + + +@dataclass(slots=True) +class SandboxConnection: + thread_id: str + sandbox_id: str + sandbox_url: str + + +class ProvisionerSandboxProvider: + def __init__(self): + provider_name = str(getattr(conf, "sandbox_provider", "provisioner")).strip().lower() + if provider_name != "provisioner": + raise RuntimeError("only sandbox_provider=provisioner is supported") + + provisioner_url = str(getattr(conf, "sandbox_provisioner_url", "") or "").strip() + if not provisioner_url: + raise RuntimeError("sandbox_provisioner_url is required") + + self._client = ProvisionerClient(provisioner_url) + self._lock = threading.Lock() + self._thread_locks: dict[str, threading.Lock] = {} + self._connections: dict[str, SandboxConnection] = {} + self._last_touch_at: dict[str, float] = {} + self._touch_interval_seconds = int(getattr(conf, "sandbox_keepalive_interval_seconds", 30)) + + def _thread_lock(self, thread_id: str) -> threading.Lock: + with self._lock: + lock = self._thread_locks.get(thread_id) + if lock is None: + lock = threading.Lock() + self._thread_locks[thread_id] = lock + return lock + + def _record_to_connection(self, thread_id: str, record: SandboxRecord) -> SandboxConnection: + connection = SandboxConnection( + thread_id=thread_id, + sandbox_id=record.sandbox_id, + sandbox_url=record.sandbox_url, + ) + self._connections[thread_id] = connection + self._last_touch_at[thread_id] = time.time() + return connection + + def _should_touch(self, thread_id: str) -> bool: + if self._touch_interval_seconds <= 0: + return False + last_touch = self._last_touch_at.get(thread_id) + if last_touch is None: + return True + return (time.time() - last_touch) >= self._touch_interval_seconds + + def _touch_if_needed(self, connection: SandboxConnection) -> bool: + if not self._should_touch(connection.thread_id): + return True + is_alive = self._client.touch(connection.sandbox_id) + self._last_touch_at[connection.thread_id] = time.time() + return is_alive + + def acquire(self, thread_id: str) -> str: + lock = self._thread_lock(thread_id) + with lock: + current = self._connections.get(thread_id) + if current: + try: + if self._touch_if_needed(current): + return current.sandbox_id + self._connections.pop(thread_id, None) + self._last_touch_at.pop(thread_id, None) + except Exception as exc: # noqa: BLE001 + logger.warning(f"Failed to touch sandbox {current.sandbox_id} for thread {thread_id}: {exc}") + return current.sandbox_id + + sandbox_id = sandbox_id_for_thread(thread_id) + record = self._client.discover(sandbox_id) + if record is None: + logger.info(f"Creating sandbox {sandbox_id} for thread {thread_id}") + record = self._client.create(sandbox_id, thread_id) + else: + logger.info(f"Reusing sandbox {sandbox_id} for thread {thread_id}") + + connection = self._record_to_connection(thread_id, record) + return connection.sandbox_id + + def get(self, thread_id: str, *, create_if_missing: bool = False) -> SandboxConnection | None: + lock = self._thread_lock(thread_id) + with lock: + current = self._connections.get(thread_id) + if current: + try: + if self._touch_if_needed(current): + return current + self._connections.pop(thread_id, None) + self._last_touch_at.pop(thread_id, None) + except Exception as exc: # noqa: BLE001 + logger.warning(f"Failed to touch sandbox {current.sandbox_id} for thread {thread_id}: {exc}") + return current + + current = self._connections.get(thread_id) + if current: + return current + + sandbox_id = sandbox_id_for_thread(thread_id) + record = self._client.discover(sandbox_id) + if record is None: + if not create_if_missing: + return None + record = self._client.create(sandbox_id, thread_id) + + return self._record_to_connection(thread_id, record) + + def shutdown(self) -> None: + with self._lock: + connections = list(self._connections.values()) + self._connections.clear() + self._last_touch_at.clear() + + for connection in connections: + try: + self._client.delete(connection.sandbox_id) + except Exception as exc: # noqa: BLE001 + logger.warning( + f"Failed to release sandbox {connection.sandbox_id} for thread {connection.thread_id}: {exc}" + ) + + +_sandbox_provider: ProvisionerSandboxProvider | None = None +_sandbox_provider_lock = threading.Lock() + + +def init_sandbox_provider() -> ProvisionerSandboxProvider: + global _sandbox_provider + with _sandbox_provider_lock: + if _sandbox_provider is None: + _sandbox_provider = ProvisionerSandboxProvider() + return _sandbox_provider + + +def get_sandbox_provider() -> ProvisionerSandboxProvider: + provider = _sandbox_provider + if provider is not None: + return provider + return init_sandbox_provider() + + +def shutdown_sandbox_provider() -> None: + global _sandbox_provider + with _sandbox_provider_lock: + provider = _sandbox_provider + _sandbox_provider = None + if provider is not None: + provider.shutdown() diff --git a/src/sandbox/provisioner_client.py b/src/sandbox/provisioner_client.py new file mode 100644 index 000000000..19e7e4fec --- /dev/null +++ b/src/sandbox/provisioner_client.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import httpx + + +@dataclass(slots=True) +class SandboxRecord: + sandbox_id: str + sandbox_url: str + status: str | None = None + + +class ProvisionerClient: + def __init__(self, base_url: str, *, timeout_seconds: int = 20): + self._base_url = base_url.rstrip("/") + self._timeout = httpx.Timeout(timeout_seconds) + + def _request(self, method: str, path: str, **kwargs) -> httpx.Response: + return httpx.request( + method=method, + url=f"{self._base_url}{path}", + timeout=self._timeout, + **kwargs, + ) + + def health(self) -> bool: + response = self._request("GET", "/health") + return response.status_code == 200 + + def create(self, sandbox_id: str, thread_id: str) -> SandboxRecord: + response = self._request( + "POST", + "/api/sandboxes", + json={"sandbox_id": sandbox_id, "thread_id": thread_id}, + ) + if response.status_code >= 400: + raise RuntimeError(f"failed to create sandbox {sandbox_id}: {response.status_code} {response.text}") + payload = response.json() + return SandboxRecord( + sandbox_id=payload["sandbox_id"], + sandbox_url=payload["sandbox_url"], + status=payload.get("status"), + ) + + def discover(self, sandbox_id: str) -> SandboxRecord | None: + response = self._request("GET", f"/api/sandboxes/{sandbox_id}") + if response.status_code == 404: + return None + if response.status_code >= 400: + raise RuntimeError(f"failed to discover sandbox {sandbox_id}: {response.status_code} {response.text}") + payload = response.json() + return SandboxRecord( + sandbox_id=payload["sandbox_id"], + sandbox_url=payload["sandbox_url"], + status=payload.get("status"), + ) + + def touch(self, sandbox_id: str) -> bool: + response = self._request("POST", f"/api/sandboxes/{sandbox_id}/touch") + if response.status_code == 404: + return False + if response.status_code >= 400: + raise RuntimeError(f"failed to touch sandbox {sandbox_id}: {response.status_code} {response.text}") + return True + + def delete(self, sandbox_id: str) -> None: + response = self._request("DELETE", f"/api/sandboxes/{sandbox_id}") + if response.status_code in {200, 404}: + return + raise RuntimeError(f"failed to delete sandbox {sandbox_id}: {response.status_code} {response.text}") diff --git a/src/services/chat_stream_service.py b/src/services/chat_stream_service.py index 245b7e1de..7cab282e1 100644 --- a/src/services/chat_stream_service.py +++ b/src/services/chat_stream_service.py @@ -10,6 +10,7 @@ from langgraph.types import Command from src import config as conf +from src import knowledge_base from src.agents import agent_manager from src.plugins.guard import content_guard from src.repositories.agent_config_repository import AgentConfigRepository @@ -450,6 +451,27 @@ def make_chunk(content=None, **kwargs): # 先构建 langgraph_config langgraph_config = {"configurable": {"thread_id": thread_id, "user_id": user_id}} + # LangGraph 会自动从 checkpointer 恢复 state(包括 uploads) + # 无需手动加载或传递 + + # 根据用户权限过滤知识库 + requested_knowledge_names = input_context["agent_config"].get("knowledges") + logger.info(f"Requesting knowledges: {requested_knowledge_names}") + if requested_knowledge_names and isinstance(requested_knowledge_names, list) and requested_knowledge_names: + user_info = {"role": "user", "department_id": department_id} + accessible_databases = await knowledge_base.get_databases_by_user(user_info) + accessible_kb_names = { + db.get("name") + for db in accessible_databases.get("databases", []) + if isinstance(db, dict) and db.get("name") + } + logger.info(f"Accessible knowledges: {accessible_kb_names}") + + filtered_knowledge_names = [kb for kb in requested_knowledge_names if kb in accessible_kb_names] + blocked_knowledge_names = [kb for kb in requested_knowledge_names if kb not in accessible_kb_names] + if blocked_knowledge_names: + logger.warning(f"用户 {user_id} 无权访问知识库: {blocked_knowledge_names}, 已自动过滤") + input_context["agent_config"]["knowledges"] = filtered_knowledge_names full_msg = None accumulated_content = [] async for msg, metadata in agent.stream_messages(messages, input_context=input_context): diff --git a/src/services/conversation_service.py b/src/services/conversation_service.py index 115365718..d9440657d 100644 --- a/src/services/conversation_service.py +++ b/src/services/conversation_service.py @@ -1,22 +1,23 @@ +import shlex import uuid -from datetime import UTC, datetime +from pathlib import Path from fastapi import HTTPException, UploadFile from sqlalchemy.ext.asyncio import AsyncSession from src.agents import agent_manager from src.repositories.conversation_repository import ConversationRepository -from src.services.doc_converter import ( - ATTACHMENT_ALLOWED_EXTENSIONS, - MAX_ATTACHMENT_SIZE_BYTES, - convert_upload_to_markdown, +from src.sandbox import ( + ProvisionerSandboxBackend, + ensure_thread_dirs, + get_sandbox_provider, + sandbox_uploads_dir, ) -from src.storage.minio.client import get_minio_client +from src.services.doc_converter import ATTACHMENT_ALLOWED_EXTENSIONS, MAX_ATTACHMENT_SIZE_BYTES from src.utils.datetime_utils import utc_isoformat from src.utils.logging_config import logger -# 附件存储桶名称 -ATTACHMENTS_BUCKET = "chat-attachments" +UPLOADS_VIRTUAL_PREFIX = "/mnt/user-data/uploads" async def require_user_conversation(conv_repo: ConversationRepository, thread_id: str, user_id: str): @@ -26,44 +27,38 @@ async def require_user_conversation(conv_repo: ConversationRepository, thread_id return conversation -def _make_attachment_path(file_name: str) -> str: - """生成附件在文件系统中的路径(无需 thread_id,state 已隔离) +def _make_upload_virtual_path(file_name: str) -> str: + safe_name = file_name.replace("/", "_").replace("\\", "_").strip(" .") + return f"{UPLOADS_VIRTUAL_PREFIX}/{safe_name or 'attachment.bin'}" - 统一使用 .md 扩展名,因为文件内容已经是 Markdown 格式 - """ - # 提取不带扩展名的部分 - base_name = file_name - for ext in [".docx", ".txt", ".html", ".htm", ".pdf", ".md"]: - if file_name.lower().endswith(ext): - base_name = file_name[: -len(ext)] - break - # 替换路径分隔符 - safe_name = base_name.replace("/", "_").replace("\\", "_") - return f"/attachments/{safe_name}.md" +def _artifact_url(thread_id: str, virtual_path: str) -> str: + return f"/api/chat/thread/{thread_id}/artifacts/{virtual_path.lstrip('/')}" -def _build_state_files(attachments: list[dict]) -> dict: - files = {} +def _build_state_uploads(attachments: list[dict]) -> list[dict]: + uploads: list[dict] = [] for attachment in attachments: - if attachment.get("status") != "parsed": + path = attachment.get("path") + if not isinstance(path, str) or not path.strip(): continue - file_path = attachment.get("file_path") - markdown = attachment.get("markdown") - if not file_path or not markdown: - continue - - now = datetime.now(UTC).isoformat() - files[file_path] = { - "content": markdown.split("\n"), - "created_at": attachment.get("uploaded_at", now), - "modified_at": attachment.get("uploaded_at", now), - } - return files + uploads.append( + { + "file_id": attachment.get("file_id"), + "file_name": attachment.get("file_name"), + "file_type": attachment.get("file_type"), + "file_size": attachment.get("file_size", 0), + "status": attachment.get("status", "uploaded"), + "uploaded_at": attachment.get("uploaded_at"), + "path": path, + "artifact_url": attachment.get("artifact_url"), + } + ) + return uploads -async def _sync_thread_attachment_state( +async def _sync_thread_upload_state( *, thread_id: str, user_id: str, @@ -73,53 +68,34 @@ async def _sync_thread_attachment_state( try: agent = agent_manager.get_agent(agent_id) if not agent: - logger.warning(f"Skip attachment state sync: agent not found ({agent_id})") + logger.warning(f"Skip upload state sync: agent not found ({agent_id})") return graph = await agent.get_graph() config = {"configurable": {"thread_id": thread_id, "user_id": str(user_id)}} - # 先获取现有 state,保留非附件文件 - state = await graph.aget_state(config) - state_values = getattr(state, "values", {}) if state else {} - existing_files = state_values.get("files", {}) if isinstance(state_values, dict) else {} - if not isinstance(existing_files, dict): - existing_files = {} - - # 仅对 /attachments 命名空间做增量更新,避免覆盖 agent 运行期生成的其它文件。 - next_attachment_files = _build_state_files(attachments) - prev_attachment_paths = { - path for path in existing_files.keys() if isinstance(path, str) and path.startswith("/attachments/") - } - next_attachment_paths = set(next_attachment_files.keys()) - - file_updates: dict[str, dict | None] = {**next_attachment_files} - for removed_path in prev_attachment_paths - next_attachment_paths: - file_updates[removed_path] = None - - # 使用 Command 确保 reducer 被正确应用 await graph.aupdate_state( config=config, values={ - "attachments": attachments, - "files": file_updates, + "uploads": _build_state_uploads(attachments), }, ) - except Exception as e: - logger.warning(f"Failed to sync attachment state for thread {thread_id}: {e}") + except Exception as exc: # noqa: BLE001 + logger.warning(f"Failed to sync upload state for thread {thread_id}: {exc}") def serialize_attachment(record: dict) -> dict: - """序列化附件记录,返回给前端""" + path = record.get("path") return { "file_id": record.get("file_id"), "file_name": record.get("file_name"), "file_type": record.get("file_type"), "file_size": record.get("file_size", 0), - "status": record.get("status", "parsed"), + "status": record.get("status", "uploaded"), "uploaded_at": record.get("uploaded_at"), - "truncated": record.get("truncated", False), - "minio_url": record.get("minio_url"), # 仅用于前端下载 + "path": path, + "artifact_url": record.get("artifact_url"), + "minio_url": record.get("minio_url"), } @@ -229,52 +205,51 @@ async def upload_thread_attachment_view( ) -> dict: conv_repo = ConversationRepository(db) conversation = await require_user_conversation(conv_repo, thread_id, str(current_user_id)) - - try: - conversion = await convert_upload_to_markdown(file) - except ValueError as exc: - raise HTTPException(status_code=400, detail=str(exc)) from exc - except Exception as exc: - logger.error(f"附件解析失败: {exc}") - raise HTTPException(status_code=500, detail="附件解析失败,请稍后重试") from exc - - # 生成文件路径 - file_path = _make_attachment_path(conversion.file_name) - - # 上传源文件到 MinIO(用于前端下载) - minio_url = None - try: - file_content = await file.read() - await file.seek(0) - client = get_minio_client() - object_name = f"attachments/{thread_id}/{conversion.file_name}" - result = client.upload_file( - bucket_name=ATTACHMENTS_BUCKET, - object_name=object_name, - data=file_content, - content_type=conversion.file_type or "application/octet-stream", + if not file.filename: + raise HTTPException(status_code=400, detail="无法识别的文件名") + + file_name = Path(file.filename).name + await file.seek(0) + file_content = await file.read() + file_size = len(file_content) + if file_size > MAX_ATTACHMENT_SIZE_BYTES: + max_size_mb = MAX_ATTACHMENT_SIZE_BYTES // (1024 * 1024) + raise HTTPException(status_code=400, detail=f"附件过大,当前仅支持 {max_size_mb} MB 以内的文件") + + upload_virtual_path = _make_upload_virtual_path(file_name) + artifact_url = _artifact_url(thread_id, upload_virtual_path) + + ensure_thread_dirs(thread_id) + uploads_dir = sandbox_uploads_dir(thread_id) + upload_actual_path = uploads_dir / Path(upload_virtual_path).name + upload_actual_path.write_bytes(file_content) + + provider = get_sandbox_provider() + connection = provider.get(thread_id, create_if_missing=False) + if connection is not None: + backend = ProvisionerSandboxBackend(thread_id=thread_id) + backend.upload_files( + [ + (upload_virtual_path, file_content), + ] ) - minio_url = result.public_url - logger.info(f"Uploaded attachment to MinIO: {object_name}") - except Exception as e: - logger.error(f"Failed to upload attachment to MinIO: {e}") - # 继续处理,不因为上传失败而中断 attachment_record = { - "file_id": conversion.file_id, - "file_name": conversion.file_name, - "file_type": conversion.file_type, - "file_size": conversion.file_size, - "status": "parsed", - "markdown": conversion.markdown, + "file_id": uuid.uuid4().hex, + "file_name": file_name, + "file_type": file.content_type, + "file_size": file_size, + "status": "uploaded", "uploaded_at": utc_isoformat(), - "truncated": conversion.truncated, - "file_path": file_path, # 用于 StateBackend,前端不返回此字段 - "minio_url": minio_url, # 暂未使用 + "path": upload_virtual_path, + "artifact_url": artifact_url, + "minio_url": None, + "storage_path": str(upload_actual_path), } + await conv_repo.add_attachment(conversation.id, attachment_record) all_attachments = await conv_repo.get_attachments(conversation.id) - await _sync_thread_attachment_state( + await _sync_thread_upload_state( thread_id=thread_id, user_id=str(current_user_id), agent_id=conversation.agent_id, @@ -311,14 +286,42 @@ async def delete_thread_attachment_view( ) -> dict: conv_repo = ConversationRepository(db) conversation = await require_user_conversation(conv_repo, thread_id, str(current_user_id)) + + existing_attachments = await conv_repo.get_attachments(conversation.id) + target_attachment = next((item for item in existing_attachments if item.get("file_id") == file_id), None) + removed = await conv_repo.remove_attachment(conversation.id, file_id) if not removed: raise HTTPException(status_code=404, detail="附件不存在或已被删除") + + if target_attachment: + candidate = target_attachment.get("storage_path") + if candidate: + try: + file_path = Path(candidate) + if file_path.exists(): + file_path.unlink() + except Exception as exc: # noqa: BLE001 + logger.warning(f"Failed to remove attachment file {candidate}: {exc}") + all_attachments = await conv_repo.get_attachments(conversation.id) - await _sync_thread_attachment_state( + await _sync_thread_upload_state( thread_id=thread_id, user_id=str(current_user_id), agent_id=conversation.agent_id, attachments=all_attachments, ) + + if target_attachment: + provider = get_sandbox_provider() + connection = provider.get(thread_id, create_if_missing=False) + if connection is not None: + backend = ProvisionerSandboxBackend(thread_id=thread_id) + delete_commands = [] + path = target_attachment.get("path") + if isinstance(path, str) and path.strip(): + delete_commands.append(f"rm -f {shlex.quote(path)}") + if delete_commands: + backend.execute(" && ".join(delete_commands)) + return {"message": "附件已删除"} diff --git a/src/services/doc_converter.py b/src/services/doc_converter.py index 4b0c60f3f..edd77692f 100644 --- a/src/services/doc_converter.py +++ b/src/services/doc_converter.py @@ -13,7 +13,7 @@ from src.knowledge.indexing import process_file_to_markdown from src.utils import logger -ATTACHMENT_ALLOWED_EXTENSIONS: tuple[str, ...] = (".txt", ".md", ".docx", ".html", ".htm") +ATTACHMENT_ALLOWED_EXTENSIONS: tuple[str, ...] = () MAX_ATTACHMENT_SIZE_BYTES = 5 * 1024 * 1024 # 5 MB MAX_ATTACHMENT_MARKDOWN_CHARS = 32_000 @@ -71,7 +71,7 @@ async def convert_upload_to_markdown(upload: UploadFile) -> ConversionResult: file_name = Path(upload.filename).name suffix = Path(file_name).suffix.lower() - if suffix not in ATTACHMENT_ALLOWED_EXTENSIONS: + if ATTACHMENT_ALLOWED_EXTENSIONS and suffix not in ATTACHMENT_ALLOWED_EXTENSIONS: allowed = ", ".join(ATTACHMENT_ALLOWED_EXTENSIONS) raise ValueError(f"不支持的文件类型: {suffix or '未知'},当前仅支持 {allowed}") diff --git a/src/services/thread_files_service.py b/src/services/thread_files_service.py new file mode 100644 index 000000000..8c216312b --- /dev/null +++ b/src/services/thread_files_service.py @@ -0,0 +1,174 @@ +from __future__ import annotations + +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from fastapi import HTTPException + +from src import config as conf +from src.repositories.conversation_repository import ConversationRepository +from src.sandbox import ( + ensure_thread_dirs, + resolve_virtual_path, + sandbox_user_data_dir, + virtual_path_for_thread_file, +) +from src.services.conversation_service import require_user_conversation + + +def _get_virtual_root() -> str: + prefix = str(getattr(conf, "sandbox_virtual_path_prefix", "/mnt/user-data") or "/mnt/user-data") + return "/" + prefix.strip("/") + + +def _to_iso8601(timestamp: float | None) -> str | None: + if timestamp is None: + return None + return datetime.fromtimestamp(timestamp, tz=UTC).isoformat() + + +async def list_thread_files_view( + *, + thread_id: str, + current_user_id: str, + db, + path: str | None = None, + recursive: bool = False, +) -> dict: + conv_repo = ConversationRepository(db) + await require_user_conversation(conv_repo, thread_id, str(current_user_id)) + + ensure_thread_dirs(thread_id) + virtual_path = path or _get_virtual_root() + try: + actual_path = resolve_virtual_path(thread_id, virtual_path) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + if not actual_path.exists(): + return {"path": virtual_path, "files": []} + if not actual_path.is_dir(): + raise HTTPException(status_code=400, detail="path must be a directory") + + if recursive: + return _list_files_recursive(thread_id, actual_path, virtual_path) + + entries: list[dict[str, Any]] = [] + for child in sorted(actual_path.iterdir(), key=lambda item: (not item.is_dir(), item.name.lower())): + stat = child.stat() + child_virtual_path = virtual_path_for_thread_file(thread_id, child) + entries.append( + { + "path": child_virtual_path, + "name": child.name, + "is_dir": child.is_dir(), + "size": stat.st_size if child.is_file() else 0, + "modified_at": _to_iso8601(stat.st_mtime), + "artifact_url": None + if child.is_dir() + else f"/api/chat/thread/{thread_id}/artifacts/{child_virtual_path.lstrip('/')}", + } + ) + + return {"path": virtual_path, "files": entries} + + +def _list_files_recursive(thread_id: str, actual_path: Path, virtual_path: str) -> dict: + entries: list[dict[str, Any]] = [] + + def _scan_dir(base_actual_path: Path, base_virtual_path: str): + try: + for child in sorted(base_actual_path.iterdir(), key=lambda item: (not item.is_dir(), item.name.lower())): + stat = child.stat() + child_virtual_path = virtual_path_for_thread_file(thread_id, child) + entries.append( + { + "path": child_virtual_path, + "name": child.name, + "is_dir": child.is_dir(), + "size": stat.st_size if child.is_file() else 0, + "modified_at": _to_iso8601(stat.st_mtime), + "artifact_url": None + if child.is_dir() + else f"/api/chat/thread/{thread_id}/artifacts/{child_virtual_path.lstrip('/')}", + } + ) + if child.is_dir(): + _scan_dir(child, child_virtual_path) + except PermissionError: + pass + + _scan_dir(actual_path, virtual_path) + return {"path": virtual_path, "files": entries} + + +async def read_thread_file_content_view( + *, + thread_id: str, + current_user_id: str, + db, + path: str, + offset: int = 0, + limit: int = 2000, +) -> dict: + conv_repo = ConversationRepository(db) + await require_user_conversation(conv_repo, thread_id, str(current_user_id)) + + try: + actual_path = resolve_virtual_path(thread_id, path) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + if not actual_path.exists(): + raise HTTPException(status_code=404, detail="file not found") + if not actual_path.is_file(): + raise HTTPException(status_code=400, detail="path must be a file") + + text = actual_path.read_text(encoding="utf-8", errors="replace") + lines = text.splitlines() + start = max(0, int(offset)) + count = min(max(1, int(limit)), 5000) + selected = lines[start : start + count] + + return { + "path": path, + "content": selected, + "offset": start, + "limit": count, + "total_lines": len(lines), + "artifact_url": f"/api/chat/thread/{thread_id}/artifacts/{path.lstrip('/')}", + } + + +async def resolve_thread_artifact_view( + *, + thread_id: str, + current_user_id: str, + db, + path: str, +) -> Path: + conv_repo = ConversationRepository(db) + await require_user_conversation(conv_repo, thread_id, str(current_user_id)) + + ensure_thread_dirs(thread_id) + + normalized = "/" + path.lstrip("/") + try: + actual_path = resolve_virtual_path(thread_id, normalized) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + if not actual_path.exists(): + raise HTTPException(status_code=404, detail="artifact not found") + if not actual_path.is_file(): + raise HTTPException(status_code=400, detail="artifact path is not a file") + + # Additional guard to ensure path remains under thread root even if helper changes. + thread_root = sandbox_user_data_dir(thread_id).resolve() + try: + actual_path.resolve().relative_to(thread_root) + except ValueError as exc: + raise HTTPException(status_code=403, detail="access denied") from exc + + return actual_path diff --git a/src/storage/postgres/manager.py b/src/storage/postgres/manager.py index cd67c8e5a..a87e24364 100644 --- a/src/storage/postgres/manager.py +++ b/src/storage/postgres/manager.py @@ -4,6 +4,7 @@ import os from contextlib import asynccontextmanager +from psycopg_pool import AsyncConnectionPool from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine from sqlalchemy.orm import declarative_base @@ -33,6 +34,7 @@ class PostgresManager(metaclass=SingletonMeta): def __init__(self): self.async_engine = None self.AsyncSession = None + self.langgraph_pool = None self._initialized = False def initialize(self): @@ -65,6 +67,21 @@ def initialize(self): expire_on_commit=False, ) + # ========================================== + # 2. 为 LangGraph 专门初始化一个原生 psycopg_pool + # ========================================== + # ⚠️ 注意:psycopg 不认识 "+asyncpg" 这样的 SQLAlchemy 方言标识。 + # 如果你的 db_url 是 "postgresql+asyncpg://user:pwd@host/db", + # 需要把它清洗成标准的 "postgresql://user:pwd@host/db" + langgraph_db_url = db_url.replace("+asyncpg", "").replace("+psycopg", "") + + # 创建 LangGraph 专属连接池 + self.langgraph_pool = AsyncConnectionPool( + conninfo=langgraph_db_url, + max_size=10, # 根据你的 Agent 并发情况设置,通常 5-10 足够了 + kwargs={"autocommit": True}, # LangGraph Checkpoint 强依赖 autocommit + ) + self._initialized = True logger.info(f"PostgreSQL manager initialized for knowledge base: {db_url.split('@')[0]}://***") except Exception as e: @@ -231,6 +248,9 @@ async def close(self): if self.async_engine: await self.async_engine.dispose() + if self.langgraph_pool: + await self.langgraph_pool.close() + async def async_check_first_run(self): """检查是否首次运行(异步版本)- 检查用户表是否有数据""" from sqlalchemy import func, select diff --git a/uv.lock b/uv.lock index bd3d54742..4b441b3d4 100644 --- a/uv.lock +++ b/uv.lock @@ -29,6 +29,56 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/46/02ac5e262d4af18054b3e922b2baedbb2a03289ee792162de60a865defc5/accelerate-1.13.0-py3-none-any.whl", hash = "sha256:cf1a3efb96c18f7b152eb0fa7490f3710b19c3f395699358f08decca2b8b62e0", size = 383744, upload-time = "2026-03-04T19:34:10.313Z" }, ] +[[package]] +name = "agent-sandbox" +version = "0.0.26" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +dependencies = [ + { name = "httpx", extra = ["socks"] }, + { name = "pydantic" }, + { name = "volcengine-python-sdk" }, +] +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/68/931c61b5e28ee344b5c2301bf56739f7fc90767da007c9e44a9b0a1ea937/agent_sandbox-0.0.26.tar.gz", hash = "sha256:67ec87e58794d017f6be321fc46913d76e84453d0cfe426a5f4b9bf8f5bd619b", size = 98635, upload-time = "2026-03-02T08:47:24.853Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/26/b5/2748e86137ee757b749d2fb7f5504e039f780c9346be798870ae01ee1559/agent_sandbox-0.0.26-py2.py3-none-any.whl", hash = "sha256:6421fc4eb6144f10ddfdd2668c6644e132056eaeb6ab89d76a0b2e4bcf327a0a", size = 216489, upload-time = "2026-03-02T08:47:23.351Z" }, +] + +[[package]] +name = "aioboto3" +version = "15.5.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +dependencies = [ + { name = "aiobotocore", extra = ["boto3"] }, + { name = "aiofiles" }, +] +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/01/92e9ab00f36e2899315f49eefcd5b4685fbb19016c7f19a9edf06da80bb0/aioboto3-15.5.0.tar.gz", hash = "sha256:ea8d8787d315594842fbfcf2c4dce3bac2ad61be275bc8584b2ce9a3402a6979", size = 255069, upload-time = "2025-10-30T13:37:16.122Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/3e/e8f5b665bca646d43b916763c901e00a07e40f7746c9128bdc912a089424/aioboto3-15.5.0-py3-none-any.whl", hash = "sha256:cc880c4d6a8481dd7e05da89f41c384dbd841454fc1998ae25ca9c39201437a6", size = 35913, upload-time = "2025-10-30T13:37:14.549Z" }, +] + +[[package]] +name = "aiobotocore" +version = "2.25.1" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "aioitertools" }, + { name = "botocore" }, + { name = "jmespath" }, + { name = "multidict" }, + { name = "python-dateutil" }, + { name = "wrapt" }, +] +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/94/2e4ec48cf1abb89971cb2612d86f979a6240520f0a659b53a43116d344dc/aiobotocore-2.25.1.tar.gz", hash = "sha256:ea9be739bfd7ece8864f072ec99bb9ed5c7e78ebb2b0b15f29781fbe02daedbc", size = 120560, upload-time = "2025-10-28T22:33:21.787Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/2a/d275ec4ce5cd0096665043995a7d76f5d0524853c76a3d04656de49f8808/aiobotocore-2.25.1-py3-none-any.whl", hash = "sha256:eb6daebe3cbef5b39a0bb2a97cffbe9c7cb46b2fcc399ad141f369f3c2134b1f", size = 86039, upload-time = "2025-10-28T22:33:19.949Z" }, +] + +[package.optional-dependencies] +boto3 = [ + { name = "boto3" }, +] + [[package]] name = "aiofiles" version = "25.1.0" @@ -98,6 +148,15 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4e/f1/ab0395f8a79933577cdd996dd2f9aa6014af9535f65dddcf88204682fe62/aiohttp-3.13.3-cp313-cp313-win_amd64.whl", hash = "sha256:693781c45a4033d31d4187d2436f5ac701e7bbfe5df40d917736108c1cc7436e", size = 453899, upload-time = "2026-01-03T17:31:15.958Z" }, ] +[[package]] +name = "aioitertools" +version = "0.13.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/3c/53c4a17a05fb9ea2313ee1777ff53f5e001aefd5cc85aa2f4c2d982e1e38/aioitertools-0.13.0.tar.gz", hash = "sha256:620bd241acc0bbb9ec819f1ab215866871b4bbd1f73836a55f799200ee86950c", size = 19322, upload-time = "2025-11-06T22:17:07.609Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/10/a1/510b0a7fadc6f43a6ce50152e69dbd86415240835868bb0bd9b5b88b1e06/aioitertools-0.13.0-py3-none-any.whl", hash = "sha256:0be0292b856f08dfac90e31f4739432f4cb6d7520ab9eb73e143f4f2fa5259be", size = 24182, upload-time = "2025-11-06T22:17:06.502Z" }, +] + [[package]] name = "aiosignal" version = "1.4.0" @@ -334,6 +393,34 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/c1/84fc6811122f54b20de2e5afb312ee07a3a47a328755587d1e505475239b/blockbuster-1.5.26-py3-none-any.whl", hash = "sha256:f8e53fb2dd4b6c6ec2f04907ddbd063ca7cd1ef587d24448ef4e50e81e3a79bb", size = 13226, upload-time = "2025-12-05T10:43:48.778Z" }, ] +[[package]] +name = "boto3" +version = "1.40.61" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/f9/6ef8feb52c3cce5ec3967a535a6114b57ac7949fd166b0f3090c2b06e4e5/boto3-1.40.61.tar.gz", hash = "sha256:d6c56277251adf6c2bdd25249feae625abe4966831676689ff23b4694dea5b12", size = 111535, upload-time = "2025-10-28T19:26:57.247Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/24/3bf865b07d15fea85b63504856e137029b6acbc73762496064219cdb265d/boto3-1.40.61-py3-none-any.whl", hash = "sha256:6b9c57b2a922b5d8c17766e29ed792586a818098efe84def27c8f582b33f898c", size = 139321, upload-time = "2025-10-28T19:26:55.007Z" }, +] + +[[package]] +name = "botocore" +version = "1.40.61" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/28/a3/81d3a47c2dbfd76f185d3b894f2ad01a75096c006a2dd91f237dca182188/botocore-1.40.61.tar.gz", hash = "sha256:a2487ad69b090f9cccd64cf07c7021cd80ee9c0655ad974f87045b02f3ef52cd", size = 14393956, upload-time = "2025-10-28T19:26:46.108Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/c5/f6ce561004db45f0b847c2cd9b19c67c6bf348a82018a48cb718be6b58b0/botocore-1.40.61-py3-none-any.whl", hash = "sha256:17ebae412692fd4824f99cde0f08d50126dc97954008e5ba2b522eb049238aa7", size = 14055973, upload-time = "2025-10-28T19:26:42.15Z" }, +] + [[package]] name = "bracex" version = "2.6" @@ -1408,6 +1495,11 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[package.optional-dependencies] +socks = [ + { name = "socksio" }, +] + [[package]] name = "httpx-sse" version = "0.4.3" @@ -1530,6 +1622,15 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "joblib" version = "1.5.3" @@ -3302,6 +3403,43 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/5b/181e2e3becb7672b502f0ed7f16ed7352aca7c109cfb94cf3878a9186db9/psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698", size = 212768, upload-time = "2026-02-18T16:46:27.365Z" }, ] +[package.optional-dependencies] +binary = [ + { name = "psycopg-binary", marker = "implementation_name != 'pypy'" }, +] +pool = [ + { name = "psycopg-pool" }, +] + +[[package]] +name = "psycopg-binary" +version = "3.3.3" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/15/021be5c0cbc5b7c1ab46e91cc3434eb42569f79a0592e67b8d25e66d844d/psycopg_binary-3.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6698dbab5bcef8fdb570fc9d35fd9ac52041771bfcfe6fd0fc5f5c4e36f1e99d", size = 4591170, upload-time = "2026-02-18T16:48:55.594Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f1/54/a60211c346c9a2f8c6b272b5f2bbe21f6e11800ce7f61e99ba75cf8b63e1/psycopg_binary-3.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:329ff393441e75f10b673ae99ab45276887993d49e65f141da20d915c05aafd8", size = 4670009, upload-time = "2026-02-18T16:49:03.608Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/53/ac7c18671347c553362aadbf65f92786eef9540676ca24114cc02f5be405/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:eb072949b8ebf4082ae24289a2b0fd724da9adc8f22743409d6fd718ddb379df", size = 5469735, upload-time = "2026-02-18T16:49:10.128Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7f/c3/4f4e040902b82a344eff1c736cde2f2720f127fe939c7e7565706f96dd44/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:263a24f39f26e19ed7fc982d7859a36f17841b05bebad3eb47bb9cd2dd785351", size = 5152919, upload-time = "2026-02-18T16:49:16.335Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/e7/d929679c6a5c212bcf738806c7c89f5b3d0919f2e1685a0e08d6ff877945/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5152d50798c2fa5bd9b68ec68eb68a1b71b95126c1d70adaa1a08cd5eefdc23d", size = 6738785, upload-time = "2026-02-18T16:49:22.687Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/b0/09703aeb69a9443d232d7b5318d58742e8ca51ff79f90ffe6b88f1db45e7/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9d6a1e56dd267848edb824dbeb08cf5bac649e02ee0b03ba883ba3f4f0bd54f2", size = 4979008, upload-time = "2026-02-18T16:49:27.313Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cc/a6/e662558b793c6e13a7473b970fee327d635270e41eded3090ef14045a6a5/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73eaaf4bb04709f545606c1db2f65f4000e8a04cdbf3e00d165a23004692093e", size = 4508255, upload-time = "2026-02-18T16:49:31.575Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5f/7f/0f8b2e1d5e0093921b6f324a948a5c740c1447fbb45e97acaf50241d0f39/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:162e5675efb4704192411eaf8e00d07f7960b679cd3306e7efb120bb8d9456cc", size = 4189166, upload-time = "2026-02-18T16:49:35.801Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/ec/ce2e91c33bc8d10b00c87e2f6b0fb570641a6a60042d6a9ae35658a3a797/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:fab6b5e37715885c69f5d091f6ff229be71e235f272ebaa35158d5a46fd548a0", size = 3924544, upload-time = "2026-02-18T16:49:41.129Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/2f/7718141485f73a924205af60041c392938852aa447a94c8cbd222ff389a1/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a4aab31bd6d1057f287c96c0effca3a25584eb9cc702f282ecb96ded7814e830", size = 4235297, upload-time = "2026-02-18T16:49:46.726Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/57/f9/1add717e2643a003bbde31b1b220172e64fbc0cb09f06429820c9173f7fc/psycopg_binary-3.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:59aa31fe11a0e1d1bcc2ce37ed35fe2ac84cd65bb9036d049b1a1c39064d0f14", size = 3547659, upload-time = "2026-02-18T16:49:52.999Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/0a/cac9fdf1df16a269ba0e5f0f06cac61f826c94cadb39df028cdfe19d3a33/psycopg_binary-3.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05f32239aec25c5fb15f7948cffdc2dc0dac098e48b80a140e4ba32b572a2e7d", size = 4590414, upload-time = "2026-02-18T16:50:01.441Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9c/c0/d8f8508fbf440edbc0099b1abff33003cd80c9e66eb3a1e78834e3fb4fb9/psycopg_binary-3.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7c84f9d214f2d1de2fafebc17fa68ac3f6561a59e291553dfc45ad299f4898c1", size = 4669021, upload-time = "2026-02-18T16:50:08.803Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/04/05/097016b77e343b4568feddf12c72171fc513acef9a4214d21b9478569068/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:e77957d2ba17cada11be09a5066d93026cdb61ada7c8893101d7fe1c6e1f3925", size = 5467453, upload-time = "2026-02-18T16:50:14.985Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/91/23/73244e5feb55b5ca109cede6e97f32ef45189f0fdac4c80d75c99862729d/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:42961609ac07c232a427da7c87a468d3c82fee6762c220f38e37cfdacb2b178d", size = 5151135, upload-time = "2026-02-18T16:50:24.82Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/49/5309473b9803b207682095201d8708bbc7842ddf3f192488a69204e36455/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae07a3114313dd91fce686cab2f4c44af094398519af0e0f854bc707e1aeedf1", size = 6737315, upload-time = "2026-02-18T16:50:35.106Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d4/5d/03abe74ef34d460b33c4d9662bf6ec1dd38888324323c1a1752133c10377/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d257c58d7b36a621dcce1d01476ad8b60f12d80eb1406aee4cf796f88b2ae482", size = 4979783, upload-time = "2026-02-18T16:50:42.067Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/6c/3fbf8e604e15f2f3752900434046c00c90bb8764305a1b81112bff30ba24/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:07c7211f9327d522c9c47560cae00a4ecf6687f4e02d779d035dd3177b41cb12", size = 4509023, upload-time = "2026-02-18T16:50:50.116Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9c/6b/1a06b43b7c7af756c80b67eac8bfaa51d77e68635a8a8d246e4f0bb7604a/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8e7e9eca9b363dbedeceeadd8be97149d2499081f3c52d141d7cd1f395a91f83", size = 4185874, upload-time = "2026-02-18T16:50:55.97Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/d3/bf49e3dcaadba510170c8d111e5e69e5ae3f981c1554c5bb71c75ce354bb/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:cb85b1d5702877c16f28d7b92ba030c1f49ebcc9b87d03d8c10bf45a2f1c7508", size = 3925668, upload-time = "2026-02-18T16:51:03.299Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f8/92/0aac830ed6a944fe334404e1687a074e4215630725753f0e3e9a9a595b62/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4d4606c84d04b80f9138d72f1e28c6c02dc5ae0c7b8f3f8aaf89c681ce1cd1b1", size = 4234973, upload-time = "2026-02-18T16:51:09.097Z" }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2e/96/102244653ee5a143ece5afe33f00f52fe64e389dfce8dbc87580c6d70d3d/psycopg_binary-3.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:74eae563166ebf74e8d950ff359be037b85723d99ca83f57d9b244a871d6c13b", size = 3551342, upload-time = "2026-02-18T16:51:13.892Z" }, +] + [[package]] name = "psycopg-pool" version = "3.3.0" @@ -4199,6 +4337,18 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7f/d0/578c47dd68152ddddddf31cd7fc67dc30b7cdf639a86275fda821b0d9d98/ruff-0.15.6-py3-none-win_arm64.whl", hash = "sha256:c34de3dd0b0ba203be50ae70f5910b17188556630e2178fd7d79fc030eb0d837", size = 11060497, upload-time = "2026-03-12T23:05:25.968Z" }, ] +[[package]] +name = "s3transfer" +version = "0.14.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/74/8d69dcb7a9efe8baa2046891735e5dfe433ad558ae23d9e3c14c633d1d58/s3transfer-0.14.0.tar.gz", hash = "sha256:eff12264e7c8b4985074ccce27a3b38a485bb7f7422cc8046fee9be4983e4125", size = 151547, upload-time = "2025-09-09T19:23:31.089Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/f0/ae7ca09223a81a1d890b2557186ea015f6e0502e9b8cb8e1813f1d8cfa4e/s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456", size = 85712, upload-time = "2025-09-09T19:23:30.041Z" }, +] + [[package]] name = "safetensors" version = "0.7.0" @@ -4366,6 +4516,15 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] +[[package]] +name = "socksio" +version = "1.0.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f8/5c/48a7d9495be3d1c651198fd99dbb6ce190e2274d0f28b9051307bdec6b85/socksio-1.0.0.tar.gz", hash = "sha256:f88beb3da5b5c38b9890469de67d0cb0f9d494b78b106ca1845f96c10b91c4ac", size = 19055, upload-time = "2020-04-17T15:50:34.664Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/c3/6eeb6034408dac0fa653d126c9204ade96b819c936e136c5e8a6897eee9c/socksio-1.0.0-py3-none-any.whl", hash = "sha256:95dc1f15f9b34e8d7b16f06d74b8ccf48f609af32ab33c608d08761c5dcbb1f3", size = 12763, upload-time = "2020-04-17T15:50:31.878Z" }, +] + [[package]] name = "soupsieve" version = "2.8.3" @@ -5172,6 +5331,21 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" }, ] +[[package]] +name = "volcengine-python-sdk" +version = "5.0.19" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +dependencies = [ + { name = "certifi" }, + { name = "python-dateutil" }, + { name = "six" }, + { name = "urllib3" }, +] +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ea/a3/ebd474d14d7b63d9e3c14fc7afa8c8fb816d0d1e295e69f0f2c3a7799c08/volcengine_python_sdk-5.0.19.tar.gz", hash = "sha256:4154e5d1d13603d146c3b972d82433287c8b9e7f8a5f94210280e0ef2875e659", size = 8277541, upload-time = "2026-03-20T09:44:50.835Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/34/84/dd6713358185122ada9341ce44d57cb4c7fedf8a4374334cb6ff7d60b74a/volcengine_python_sdk-5.0.19-py2.py3-none-any.whl", hash = "sha256:95cd4e473d9f59423ee1cbcbe37e0f543448f133a3c1c540d2c6eaf83b526ed2", size = 32545589, upload-time = "2026-03-20T09:44:42.391Z" }, +] + [[package]] name = "wasabi" version = "1.1.3" @@ -5490,6 +5664,8 @@ name = "yuxi-know" version = "0.5.3" source = { virtual = "." } dependencies = [ + { name = "agent-sandbox" }, + { name = "aioboto3" }, { name = "aiofiles" }, { name = "aiohttp" }, { name = "aiosqlite" }, @@ -5529,6 +5705,7 @@ dependencies = [ { name = "openai" }, { name = "opencv-python-headless" }, { name = "pillow" }, + { name = "psycopg", extra = ["binary", "pool"] }, { name = "pyjwt" }, { name = "pymilvus" }, { name = "pymupdf" }, @@ -5556,6 +5733,7 @@ dependencies = [ { name = "typer" }, { name = "unstructured" }, { name = "uvicorn", extra = ["standard"] }, + { name = "wcmatch" }, ] [package.dev-dependencies] @@ -5571,6 +5749,8 @@ test = [ [package.metadata] requires-dist = [ + { name = "agent-sandbox", specifier = ">=0.0.26" }, + { name = "aioboto3", specifier = ">=13.0.0" }, { name = "aiofiles", specifier = ">=24.1.0" }, { name = "aiohttp", specifier = ">=3.9.0" }, { name = "aiosqlite", specifier = ">=0.20.0" }, @@ -5610,6 +5790,7 @@ requires-dist = [ { name = "openai", specifier = ">=1.109" }, { name = "opencv-python-headless", specifier = ">=4.11.0.86" }, { name = "pillow", specifier = ">=10.5.0" }, + { name = "psycopg", extras = ["binary", "pool"], specifier = ">=3.3.3" }, { name = "pyjwt", specifier = ">=2.8.0" }, { name = "pymilvus", specifier = ">=2.5.8" }, { name = "pymupdf", specifier = ">=1.25.5" }, @@ -5635,6 +5816,7 @@ requires-dist = [ { name = "typer", specifier = ">=0.16.0" }, { name = "unstructured", specifier = ">=0.17.2" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.2" }, + { name = "wcmatch", specifier = ">=8.0.0" }, ] [package.metadata.requires-dev] diff --git a/web/src/apis/agent_api.js b/web/src/apis/agent_api.js index 57f1bcb97..2301b3ade 100644 --- a/web/src/apis/agent_api.js +++ b/web/src/apis/agent_api.js @@ -174,7 +174,7 @@ export const agentApi = { /** * 恢复被人工审批中断的对话(流式响应) * @param {string} agentId - 智能体ID - * @param {Object} data - 恢复数据 { thread_id, answer: { question_id: answer }, approved } + * @param {Object} data - 恢复数据 { thread_id, answer: { question_id: answer }, approved } * @param {Object} options - 可选参数(signal, headers等) * @returns {Promise} - 恢复响应流 */ @@ -343,6 +343,57 @@ export const threadApi = { */ getThreadAttachments: (threadId) => apiGet(`/api/chat/thread/${threadId}/attachments`), + /** + * 列出线程文件(目录) + * @param {string} threadId + * @param {string} path + * @param {boolean} recursive + * @returns {Promise} + */ + listThreadFiles: (threadId, path = '/mnt/user-data', recursive = false) => + apiGet( + `/api/chat/thread/${threadId}/files?path=${encodeURIComponent(path)}&recursive=${recursive}` + ), + + /** + * 读取线程文本文件内容(分页) + * @param {string} threadId + * @param {string} path + * @param {number} offset + * @param {number} limit + * @returns {Promise} + */ + readThreadFile: (threadId, path, offset = 0, limit = 2000) => + apiGet( + `/api/chat/thread/${threadId}/files/content?path=${encodeURIComponent(path)}&offset=${offset}&limit=${limit}` + ), + + /** + * 获取线程文件下载/预览 URL + * @param {string} threadId + * @param {string} path + * @param {boolean} download + * @returns {string} + */ + getThreadArtifactUrl: (threadId, path, download = false) => { + const encodedPath = path + .split('/') + .filter(Boolean) + .map((segment) => encodeURIComponent(segment)) + .join('/') + const query = download ? '?download=true' : '' + return `/api/chat/thread/${threadId}/artifacts/${encodedPath}${query}` + }, + + /** + * 下载线程文件(带鉴权) + * @param {string} threadId + * @param {string} path + * @returns {Promise} + */ + downloadThreadArtifact: (threadId, path) => + apiGet(threadApi.getThreadArtifactUrl(threadId, path, true), {}, true, 'blob'), + /** * 上传附件 * @param {string} threadId diff --git a/web/src/components/AgentChatComponent.vue b/web/src/components/AgentChatComponent.vue index af3308257..cb046afe3 100644 --- a/web/src/components/AgentChatComponent.vue +++ b/web/src/components/AgentChatComponent.vue @@ -202,6 +202,7 @@ { const currentAgentState = computed(() => { return currentChatId.value ? getThreadState(currentChatId.value)?.agentState || null : null }) - -const countFiles = (files) => { - if (!files) return 0 - if (Array.isArray(files)) { - return files.reduce( - (c, item) => c + (item && typeof item === 'object' ? Object.keys(item).length : 0), - 0 - ) - } - return typeof files === 'object' ? Object.keys(files).length : 0 -} +const currentThreadFiles = computed(() => { + if (!currentChatId.value) return [] + return threadFilesMap.value[currentChatId.value] || [] +}) +const currentThreadAttachments = computed(() => { + if (!currentChatId.value) return [] + return threadAttachmentsMap.value[currentChatId.value] || [] +}) const hasAgentStateContent = computed(() => { const s = currentAgentState.value - if (!s) return false - const todoCount = Array.isArray(s.todos) ? s.todos.length : 0 - const fileCount = countFiles(s.files) + if (!s && currentThreadFiles.value.length === 0) return false + const todoCount = Array.isArray(s?.todos) ? s.todos.length : 0 + const fileCount = currentThreadFiles.value.filter((item) => item?.is_dir !== true).length return todoCount > 0 || fileCount > 0 }) @@ -420,31 +420,32 @@ watch(hasAgentStateContent, (newVal, oldVal) => { }) const mentionConfig = computed(() => { - const rawFiles = currentAgentState.value?.files || {} - const files = [] - - // 处理 files - 兼容字典格式 {"/path/file": {content: [...]}} 和旧数组格式 - if (typeof rawFiles === 'object' && !Array.isArray(rawFiles) && rawFiles !== null) { - // 新格式:字典格式 {"/attachments/xxx/file.md": {...}} - Object.entries(rawFiles).forEach(([filePath, fileData]) => { - files.push({ - path: filePath, - ...fileData + const fileMap = new Map() + currentThreadFiles.value + .filter((item) => item && item.is_dir !== true && typeof item.path === 'string') + .forEach((item) => { + fileMap.set(item.path, { + path: item.path, + size: item.size, + modified_at: item.modified_at, + artifact_url: item.artifact_url }) }) - } else if (Array.isArray(rawFiles)) { - // 旧格式:数组格式 - rawFiles.forEach((item) => { - if (typeof item === 'object' && item !== null) { - Object.entries(item).forEach(([filePath, fileData]) => { - files.push({ - path: filePath, - ...fileData - }) + + currentThreadAttachments.value.forEach((item) => { + const candidates = [[item?.path, item?.artifact_url]] + candidates.forEach(([path, artifactUrl]) => { + if (typeof path !== 'string' || !path) return + if (!fileMap.has(path)) { + fileMap.set(path, { + path, + artifact_url: artifactUrl || null }) } }) - } + }) + + const files = Array.from(fileMap.values()) // Filter KBs and MCPs based on agent config const configItems = configurableItems.value || {} @@ -626,6 +627,8 @@ const cleanupThreadState = (threadId) => { } delete chatState.threadStates[threadId] } + delete threadFilesMap.value[threadId] + delete threadAttachmentsMap.value[threadId] } // ==================== STREAM HANDLING LOGIC ==================== @@ -717,6 +720,8 @@ const createThread = async (agentId, title = '新的对话') => { if (thread) { threads.value.unshift(thread) threadMessages.value[thread.id] = [] + threadFilesMap.value[thread.id] = [] + threadAttachmentsMap.value[thread.id] = [] } return thread } catch (error) { @@ -737,6 +742,8 @@ const deleteThread = async (threadId) => { await threadApi.deleteThread(threadId) threads.value = threads.value.filter((thread) => thread.id !== threadId) delete threadMessages.value[threadId] + delete threadFilesMap.value[threadId] + delete threadAttachmentsMap.value[threadId] if (chatState.currentThreadId === threadId) { chatState.currentThreadId = null @@ -809,6 +816,36 @@ const fetchThreadMessages = async ({ agentId, threadId, delay = 0 }) => { } } +const fetchThreadFiles = async (threadId) => { + if (!threadId) return + try { + const response = await threadApi.listThreadFiles(threadId, '/mnt/user-data', true) + const entries = Array.isArray(response?.files) ? response.files : [] + threadFilesMap.value[threadId] = entries + } catch (error) { + console.warn('Failed to fetch thread files:', error) + threadFilesMap.value[threadId] = [] + } +} + +const fetchThreadAttachments = async (threadId) => { + if (!threadId) return + try { + const response = await threadApi.getThreadAttachments(threadId) + threadAttachmentsMap.value[threadId] = Array.isArray(response?.attachments) + ? response.attachments + : [] + } catch (error) { + console.warn('Failed to fetch thread attachments:', error) + threadAttachmentsMap.value[threadId] = [] + } +} + +const refreshThreadFilesAndAttachments = async (threadId) => { + if (!threadId) return + await Promise.all([fetchThreadFiles(threadId), fetchThreadAttachments(threadId)]) +} + const fetchAgentState = async (agentId, threadId) => { if (!agentId || !threadId) return try { @@ -1164,8 +1201,8 @@ const startRunStream = async (threadId, runId, afterSeq = '0') => { } const approvalStatuses = ['ask_user_question_required', 'human_approval_required'] - const isApprovalEvent = approvalStatuses.includes(event) || - approvalStatuses.includes(payload?.chunk?.status) + const isApprovalEvent = + approvalStatuses.includes(event) || approvalStatuses.includes(payload?.chunk?.status) if (isApprovalEvent) { const approvalChunk = payload?.chunk || { status: event, thread_id: threadId } @@ -1181,7 +1218,7 @@ const startRunStream = async (threadId, runId, afterSeq = '0') => { clearActiveRunSnapshot(threadId) fetchThreadMessages({ agentId: currentAgentId.value, threadId, delay: 200 }).finally( () => { - fetchAgentState(currentAgentId.value, threadId) + handleAgentStateRefresh(threadId) } ) } else if (ts.activeRunId === runId) { @@ -1208,7 +1245,7 @@ const startRunStream = async (threadId, runId, afterSeq = '0') => { clearActiveRunSnapshot(threadId) fetchThreadMessages({ agentId: currentAgentId.value, threadId, delay: 300 }).finally(() => { resetOnGoingConv(threadId) - fetchAgentState(currentAgentId.value, threadId) + handleAgentStateRefresh(threadId) scrollController.scrollToBottom() }) } @@ -1459,7 +1496,8 @@ const selectChat = async (chatId) => { await nextTick() scrollController.scrollToBottomStaticForce() - await fetchAgentState(targetAgentId, chatId) + // await fetchAgentState(targetAgentId, chatId) + await handleAgentStateRefresh(chatId) await resumeActiveRunForThread(chatId) } @@ -1607,7 +1645,7 @@ const handleSendMessage = async ({ image } = {}) => { fetchThreadMessages({ agentId: currentAgentId.value, threadId: threadId }).finally(() => { // 历史记录加载完成后,安全地清空当前进行中的对话 resetOnGoingConv(threadId) - fetchAgentState(currentAgentId.value, threadId) + handleAgentStateRefresh(threadId) scrollController.scrollToBottom() }) } @@ -1737,7 +1775,10 @@ const handleAgentStateRefresh = async (threadId = null) => { if (!currentAgentId.value) return const chatId = threadId || currentChatId.value if (!chatId) return - await fetchAgentState(currentAgentId.value, chatId) + await Promise.all([ + fetchAgentState(currentAgentId.value, chatId), + refreshThreadFilesAndAttachments(chatId) + ]) } const toggleAgentPanel = () => { @@ -1823,6 +1864,8 @@ const loadChatsList = async () => { console.warn('No agent selected, cannot load chats list') threads.value = [] chatState.currentThreadId = null + threadFilesMap.value = {} + threadAttachmentsMap.value = {} return } @@ -1876,6 +1919,8 @@ watch( // 清理当前线程状态 chatState.currentThreadId = null threadMessages.value = {} + threadFilesMap.value = {} + threadAttachmentsMap.value = {} // 清理所有线程状态 resetOnGoingConv() diff --git a/web/src/components/AgentPanel.vue b/web/src/components/AgentPanel.vue index 07f27d3f2..661332f7a 100644 --- a/web/src/components/AgentPanel.vue +++ b/web/src/components/AgentPanel.vue @@ -138,7 +138,7 @@