AstrBotDevs · Soulter · Mar 22, 2026 · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026
diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py
@@ -50,6 +50,11 @@
 from astrbot.core.tools.send_message import SEND_MESSAGE_TO_USER_TOOL
 from astrbot.core.utils.file_extract import extract_file_moonshotai
 from astrbot.core.utils.llm_metadata import LLM_METADATAS
+from astrbot.core.utils.media_utils import (
+    IMAGE_COMPRESS_DEFAULT_MAX_SIZE,
+    IMAGE_COMPRESS_DEFAULT_QUALITY,
+    compress_image,
+)
 from astrbot.core.utils.quoted_message.settings import (
     SETTINGS as DEFAULT_QUOTED_MESSAGE_SETTINGS,
 )
@@ -445,10 +450,14 @@ async def _ensure_img_caption(
     image_caption_provider: str,
 ) -> None:
     try:
+        compressed_urls = []
+        for url in req.image_urls:
+            compressed_url = await _compress_image_for_provider(url, cfg)
+            compressed_urls.append(compressed_url)
         caption = await _request_img_caption(
             image_caption_provider,
             cfg,
-            req.image_urls,
+            compressed_urls,
             plugin_context,
         )
         if caption:
@@ -458,6 +467,9 @@ async def _ensure_img_caption(
             req.image_urls = []
     except Exception as exc:  # noqa: BLE001
         logger.error("处理图片描述失败: %s", exc)
+        req.extra_user_content_parts.append(TextPart(text="[Image Captioning Failed]"))
+    finally:
+        req.image_urls = []
 
 
 def _append_quoted_image_attachment(req: ProviderRequest, image_path: str) -> None:
@@ -477,12 +489,53 @@ def _get_quoted_message_parser_settings(
     return DEFAULT_QUOTED_MESSAGE_SETTINGS.with_overrides(overrides)
 
 
+def _get_image_compress_args(
+    provider_settings: dict[str, object] | None,
+) -> tuple[bool, int, int]:
+    if not isinstance(provider_settings, dict):
+        return True, IMAGE_COMPRESS_DEFAULT_MAX_SIZE, IMAGE_COMPRESS_DEFAULT_QUALITY
+
+    enabled = provider_settings.get("image_compress_enabled", True)
+    if not isinstance(enabled, bool):
+        enabled = True
+
+    raw_options = provider_settings.get("image_compress_options", {})
+    options = raw_options if isinstance(raw_options, dict) else {}
+
+    max_size = options.get("max_size", IMAGE_COMPRESS_DEFAULT_MAX_SIZE)
+    if not isinstance(max_size, int):
+        max_size = IMAGE_COMPRESS_DEFAULT_MAX_SIZE
+    max_size = max(max_size, 1)
+
+    quality = options.get("quality", IMAGE_COMPRESS_DEFAULT_QUALITY)
+    if not isinstance(quality, int):
+        quality = IMAGE_COMPRESS_DEFAULT_QUALITY
+    quality = min(max(quality, 1), 100)
+
+    return enabled, max_size, quality
+
+
+async def _compress_image_for_provider(
+    url_or_path: str,
+    provider_settings: dict[str, object] | None,
+) -> str:
+    try:
+        enabled, max_size, quality = _get_image_compress_args(provider_settings)
+        if not enabled:
+            return url_or_path
+        return await compress_image(url_or_path, max_size=max_size, quality=quality)
+    except Exception as exc:  # noqa: BLE001
+        logger.error("Image compression failed: %s", exc)
+        return url_or_path
+
+
 async def _process_quote_message(
     event: AstrMessageEvent,
     req: ProviderRequest,
     img_cap_prov_id: str,
     plugin_context: Context,
     quoted_message_settings: QuotedMessageParserSettings = DEFAULT_QUOTED_MESSAGE_SETTINGS,
+    config: MainAgentBuildConfig | None = None,
 ) -> None:
     quote = None
     for comp in event.message_obj.message:
@@ -521,9 +574,14 @@ async def _process_quote_message(
                 prov = plugin_context.get_using_provider(event.unified_msg_origin)
 
             if prov and isinstance(prov, Provider):
+                path = await image_seg.convert_to_file_path()
+                image_path = await _compress_image_for_provider(
+                    path,
+                    config.provider_settings if config else None,
+                )
                 llm_resp = await prov.text_chat(
                     prompt=IMAGE_CAPTION_DEFAULT_PROMPT,
-                    image_urls=[await image_seg.convert_to_file_path()],
+                    image_urls=[image_path],
                 )
                 if llm_resp.completion_text:
                     content_parts.append(
@@ -615,6 +673,7 @@ async def _decorate_llm_request(
         img_cap_prov_id,
         plugin_context,
         quoted_message_settings,
+        config,
     )
 
     tz = config.timezone
@@ -936,7 +995,11 @@ async def build_main_agent(
             # media files attachments
             for comp in event.message_obj.message:
                 if isinstance(comp, Image):
-                    image_path = await comp.convert_to_file_path()
+                    path = await comp.convert_to_file_path()
+                    image_path = await _compress_image_for_provider(
+                        path,
+                        config.provider_settings,
+                    )
                     req.image_urls.append(image_path)
                     req.extra_user_content_parts.append(
                         TextPart(text=f"[Image Attachment: path {image_path}]")
@@ -963,7 +1026,11 @@ async def build_main_agent(
                     for reply_comp in comp.chain:
                         if isinstance(reply_comp, Image):
                             has_embedded_image = True
-                            image_path = await reply_comp.convert_to_file_path()
+                            path = await reply_comp.convert_to_file_path()
+                            image_path = await _compress_image_for_provider(
+                                path,
+                                config.provider_settings,
+                            )
                             req.image_urls.append(image_path)
                             _append_quoted_image_attachment(req, image_path)
                         elif isinstance(reply_comp, File):

diff --git a/astrbot/core/computer/tools/shell.py b/astrbot/core/computer/tools/shell.py
@@ -62,7 +62,9 @@ async def call(
                 umo=context.context.event.unified_msg_origin
             )
             try:
-                timeout = int(config.get("provider_settings", {}).get("tool_call_timeout", 30))
+                timeout = int(
+                    config.get("provider_settings", {}).get("tool_call_timeout", 30)
+                )
             except (ValueError, TypeError):
                 timeout = 30
             result = await sb.shell.exec(

diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py
@@ -147,6 +147,11 @@
             "shipyard_neo_profile": "python-default",
             "shipyard_neo_ttl": 3600,
         },
+        "image_compress_enabled": True,
+        "image_compress_options": {
+            "max_size": 1024,
+            "quality": 95,
+        },
     },
     # SubAgent orchestrator mode:
     # - main_enable = False: disabled; main LLM mounts tools normally (persona selection).
@@ -3328,6 +3333,29 @@ class ChatProviderTemplate(TypedDict):
                         "type": "string",
                         "hint": "可使用 {{prompt}} 作为用户输入的占位符。如果不输入占位符则代表添加在用户输入的前面。",
                     },
+                    "provider_settings.image_compress_enabled": {
+                        "description": "启用图片压缩",
+                        "type": "bool",
+                        "hint": "启用后，发送给多模态模型前会先压缩本地大图片。仅对 chat_completion 提供商生效。",
+                    },
+                    "provider_settings.image_compress_options.max_size": {
+                        "description": "最大边长",
+                        "type": "int",
+                        "hint": "压缩后图片的最长边，单位为像素。超过该尺寸时会按比例缩放。",
+                        "condition": {
+                            "provider_settings.image_compress_enabled": True,
+                        },
+                        "slider": {"min": 256, "max": 4096, "step": 64},
+                    },
+                    "provider_settings.image_compress_options.quality": {
+                        "description": "压缩质量",
+                        "type": "int",
+                        "hint": "JPEG 输出质量，范围为 1-100。值越高，画质越好，文件也越大。",
+                        "condition": {
+                            "provider_settings.image_compress_enabled": True,
+                        },
+                        "slider": {"min": 1, "max": 100, "step": 1},
+                    },
                     "provider_tts_settings.dual_output": {
                         "description": "开启 TTS 时同时输出语音和文字内容",
                         "type": "bool",

diff --git a/astrbot/core/utils/media_utils.py b/astrbot/core/utils/media_utils.py
@@ -4,14 +4,23 @@
 """
 
 import asyncio
+import base64
+import io
 import os
 import subprocess
 import uuid
 from pathlib import Path
 
+from PIL import Image as PILImage
+
 from astrbot import logger
 from astrbot.core.utils.astrbot_path import get_astrbot_temp_path
 
+IMAGE_COMPRESS_DEFAULT_MAX_SIZE = 1280
+IMAGE_COMPRESS_DEFAULT_QUALITY = 95
+IMAGE_COMPRESS_DEFAULT_OPTIMIZE = True
+IMAGE_COMPRESS_DEFAULT_MIN_FILE_SIZE_MB = 1.0
+
 
 async def get_media_duration(file_path: str) -> int | None:
     """使用ffprobe获取媒体文件时长
@@ -316,3 +325,88 @@ async def extract_video_cover(
         return output_path
     except FileNotFoundError:
         raise Exception("ffmpeg not found")
+
+
+def _compress_image_sync(
+    data: bytes,
+    temp_dir: Path,
+    max_size: int,
+    quality: int,
+    optimize: bool,
+) -> str:
+    """Run image compression synchronously via ``asyncio.to_thread``."""
+    with PILImage.open(io.BytesIO(data)) as opened_img:
+        img = opened_img
+        converted_img: PILImage.Image | None = None
+
+        try:
+            if img.mode != "RGB":
+                converted_img = img.convert("RGB")
+                img = converted_img
+
+            if max(img.size) > max_size:
+                img.thumbnail((max_size, max_size), PILImage.Resampling.LANCZOS)
+
+            new_uuid = uuid.uuid4().hex
+            save_path = temp_dir / f"compressed_{new_uuid}.jpg"
+            img.save(save_path, "JPEG", quality=quality, optimize=optimize)
+            logger.debug(f"Image compressed successfully: {save_path}")
+            return str(save_path)
+        finally:
+            if converted_img is not None:
+                converted_img.close()
+
+
+async def compress_image(
+    url_or_path: str,
+    max_size: int = IMAGE_COMPRESS_DEFAULT_MAX_SIZE,
+    quality: int = IMAGE_COMPRESS_DEFAULT_QUALITY,
+) -> str:
+    """Compress large user-uploaded images.
+
+    Args:
+        url_or_path: Image path or URL.
+        max_size: Longest edge of the compressed image in pixels.
+        quality: JPEG output quality in the range 1-100.
+
+    Returns:
+        The compressed image path. Returns the original path if compression
+        fails or the source does not need compression.
+    """
+    max_size = max(int(max_size), 1)
+    quality = min(max(int(quality), 1), 100)
+    optimize = IMAGE_COMPRESS_DEFAULT_OPTIMIZE
+    min_file_size_bytes = int(IMAGE_COMPRESS_DEFAULT_MIN_FILE_SIZE_MB * 1024 * 1024)
+    data = None
+    # Skip compression for remote images and return the original value.
+    if url_or_path.startswith("http"):
+        return url_or_path
+    elif url_or_path.startswith("data:image"):
+        _header, encoded = url_or_path.split(",", 1)
+        data = base64.b64decode(encoded)
+        if len(data) < min_file_size_bytes:
+            return url_or_path
+    else:
+        local_path = Path(url_or_path)
+        if not local_path.exists():
+            return url_or_path
+        if local_path.stat().st_size < min_file_size_bytes:
+            return url_or_path
+        with local_path.open("rb") as f:
+            data = f.read()
+
+    if not data:
+        return url_or_path
+
+    temp_dir = Path(get_astrbot_temp_path())
+    temp_dir.mkdir(parents=True, exist_ok=True)
+
+    # Offload the blocking image processing task to a thread.
+    return await asyncio.to_thread(
+        _compress_image_sync,
+        data,
+        temp_dir,
+        max_size,
+        quality,
+        optimize,
+    )
diff --git a/dashboard/src/i18n/locales/en-US/features/config-metadata.json b/dashboard/src/i18n/locales/en-US/features/config-metadata.json
@@ -335,6 +335,22 @@
           "description": "User Prompt",
           "hint": "You can use {{prompt}} as a placeholder for user input. If no placeholder is provided, it will be added before the user input."
         },
+        "image_compress_enabled": {
+          "description": "Enable image compression",
+          "hint": "When enabled, large local images are compressed before being sent to multimodal models. Applies only to chat_completion providers."
+        },
+        "image_compress_options": {
+          "description": "Image compression settings",
+          "hint": "Control image resize limits, JPEG quality, and the minimum size threshold for compression.",
+          "max_size": {
+            "description": "Maximum edge length",
+            "hint": "Longest edge of the compressed image in pixels. Images larger than this are resized proportionally."
+          },
+          "quality": {
+            "description": "JPEG quality",
+            "hint": "JPEG output quality from 1 to 100. Higher values preserve more detail but produce larger files."
+          }
+        },
         "reachability_check": {
           "description": "Provider Reachability Check",
           "hint": "When running the /provider command, test provider connectivity in parallel. This actively pings models and may consume extra tokens."
@@ -1523,4 +1539,4 @@
     "helpMiddle": "or",
     "helpSuffix": "."
   }
-}
+}
diff --git a/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json b/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json
@@ -335,6 +335,22 @@
                     "description": "Промпт пользователя",
                     "hint": "Вы можете использовать {{prompt}} как заполнитель для ввода. Если заполнитель не указан, он будет добавлен перед текстом пользователя."
                 },
+                "image_compress_enabled": {
+                    "description": "Включить сжатие изображений",
+                    "hint": "Когда включено, большие локальные изображения сжимаются перед отправкой в мультимодальные модели. Применяется только к провайдерам chat_completion."
+                },
+                "image_compress_options": {
+                    "description": "Настройки сжатия изображений",
+                    "hint": "Управляет ограничением размера, качеством JPEG и минимальным порогом размера для сжатия.",
+                    "max_size": {
+                        "description": "Максимальная длина стороны",
+                        "hint": "Максимальная длина стороны сжатого изображения в пикселях. Более крупные изображения пропорционально уменьшаются."
+                    },
+                    "quality": {
+                        "description": "Качество JPEG",
+                        "hint": "Качество JPEG от 1 до 100. Более высокие значения сохраняют больше деталей, но увеличивают размер файла."
+                    }
+                },
                 "reachability_check": {
                     "description": "Проверка доступности провайдеров",
                     "hint": "При выполнении команды /provider проверяет связь со всеми моделями. Это может расходовать токены."

diff --git a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
@@ -337,6 +337,22 @@
           "description": "用户提示词",
           "hint": "可使用 {{prompt}} 作为用户输入的占位符。如果不输入占位符则代表添加在用户输入的前面。"
         },
+        "image_compress_enabled": {
+          "description": "启用图片压缩",
+          "hint": "启用后，发送给多模态模型前会先压缩本地大图片。仅对 chat_completion 提供商生效。"
+        },
+        "image_compress_options": {
+          "description": "图片压缩配置",
+          "hint": "用于控制图片压缩的尺寸、质量和触发阈值。",
+          "max_size": {
+            "description": "最大边长",
+            "hint": "压缩后图片的最长边，单位为像素。超过该尺寸时会按比例缩放。"
+          },
+          "quality": {
+            "description": "JPEG 质量",
+            "hint": "JPEG 输出质量，范围为 1-100。值越高，画质越好，文件也越大。"
+          }
+        },
         "reachability_check": {
           "description": "提供商可达性检测",
           "hint": "/provider 命令列出模型时并发检测连通性。开启后会主动调用模型测试连通性,可能产生额外 token 消耗。"