diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py index 87b1726d67..1b3ace7203 100644 --- a/astrbot/core/astr_main_agent.py +++ b/astrbot/core/astr_main_agent.py @@ -50,6 +50,11 @@ from astrbot.core.tools.send_message import SEND_MESSAGE_TO_USER_TOOL from astrbot.core.utils.file_extract import extract_file_moonshotai from astrbot.core.utils.llm_metadata import LLM_METADATAS +from astrbot.core.utils.media_utils import ( + IMAGE_COMPRESS_DEFAULT_MAX_SIZE, + IMAGE_COMPRESS_DEFAULT_QUALITY, + compress_image, +) from astrbot.core.utils.quoted_message.settings import ( SETTINGS as DEFAULT_QUOTED_MESSAGE_SETTINGS, ) @@ -445,10 +450,14 @@ async def _ensure_img_caption( image_caption_provider: str, ) -> None: try: + compressed_urls = [] + for url in req.image_urls: + compressed_url = await _compress_image_for_provider(url, cfg) + compressed_urls.append(compressed_url) caption = await _request_img_caption( image_caption_provider, cfg, - req.image_urls, + compressed_urls, plugin_context, ) if caption: @@ -458,6 +467,9 @@ async def _ensure_img_caption( req.image_urls = [] except Exception as exc: # noqa: BLE001 logger.error("处理图片描述失败: %s", exc) + req.extra_user_content_parts.append(TextPart(text="[Image Captioning Failed]")) + finally: + req.image_urls = [] def _append_quoted_image_attachment(req: ProviderRequest, image_path: str) -> None: @@ -477,12 +489,53 @@ def _get_quoted_message_parser_settings( return DEFAULT_QUOTED_MESSAGE_SETTINGS.with_overrides(overrides) +def _get_image_compress_args( + provider_settings: dict[str, object] | None, +) -> tuple[bool, int, int]: + if not isinstance(provider_settings, dict): + return True, IMAGE_COMPRESS_DEFAULT_MAX_SIZE, IMAGE_COMPRESS_DEFAULT_QUALITY + + enabled = provider_settings.get("image_compress_enabled", True) + if not isinstance(enabled, bool): + enabled = True + + raw_options = provider_settings.get("image_compress_options", {}) + options = raw_options if isinstance(raw_options, dict) else {} + + max_size = options.get("max_size", IMAGE_COMPRESS_DEFAULT_MAX_SIZE) + if not isinstance(max_size, int): + max_size = IMAGE_COMPRESS_DEFAULT_MAX_SIZE + max_size = max(max_size, 1) + + quality = options.get("quality", IMAGE_COMPRESS_DEFAULT_QUALITY) + if not isinstance(quality, int): + quality = IMAGE_COMPRESS_DEFAULT_QUALITY + quality = min(max(quality, 1), 100) + + return enabled, max_size, quality + + +async def _compress_image_for_provider( + url_or_path: str, + provider_settings: dict[str, object] | None, +) -> str: + try: + enabled, max_size, quality = _get_image_compress_args(provider_settings) + if not enabled: + return url_or_path + return await compress_image(url_or_path, max_size=max_size, quality=quality) + except Exception as exc: # noqa: BLE001 + logger.error("Image compression failed: %s", exc) + return url_or_path + + async def _process_quote_message( event: AstrMessageEvent, req: ProviderRequest, img_cap_prov_id: str, plugin_context: Context, quoted_message_settings: QuotedMessageParserSettings = DEFAULT_QUOTED_MESSAGE_SETTINGS, + config: MainAgentBuildConfig | None = None, ) -> None: quote = None for comp in event.message_obj.message: @@ -521,9 +574,14 @@ async def _process_quote_message( prov = plugin_context.get_using_provider(event.unified_msg_origin) if prov and isinstance(prov, Provider): + path = await image_seg.convert_to_file_path() + image_path = await _compress_image_for_provider( + path, + config.provider_settings if config else None, + ) llm_resp = await prov.text_chat( prompt=IMAGE_CAPTION_DEFAULT_PROMPT, - image_urls=[await image_seg.convert_to_file_path()], + image_urls=[image_path], ) if llm_resp.completion_text: content_parts.append( @@ -615,6 +673,7 @@ async def _decorate_llm_request( img_cap_prov_id, plugin_context, quoted_message_settings, + config, ) tz = config.timezone @@ -936,7 +995,11 @@ async def build_main_agent( # media files attachments for comp in event.message_obj.message: if isinstance(comp, Image): - image_path = await comp.convert_to_file_path() + path = await comp.convert_to_file_path() + image_path = await _compress_image_for_provider( + path, + config.provider_settings, + ) req.image_urls.append(image_path) req.extra_user_content_parts.append( TextPart(text=f"[Image Attachment: path {image_path}]") @@ -963,7 +1026,11 @@ async def build_main_agent( for reply_comp in comp.chain: if isinstance(reply_comp, Image): has_embedded_image = True - image_path = await reply_comp.convert_to_file_path() + path = await reply_comp.convert_to_file_path() + image_path = await _compress_image_for_provider( + path, + config.provider_settings, + ) req.image_urls.append(image_path) _append_quoted_image_attachment(req, image_path) elif isinstance(reply_comp, File): diff --git a/astrbot/core/computer/tools/shell.py b/astrbot/core/computer/tools/shell.py index 251a67f361..d9fb25e7dc 100644 --- a/astrbot/core/computer/tools/shell.py +++ b/astrbot/core/computer/tools/shell.py @@ -62,7 +62,9 @@ async def call( umo=context.context.event.unified_msg_origin ) try: - timeout = int(config.get("provider_settings", {}).get("tool_call_timeout", 30)) + timeout = int( + config.get("provider_settings", {}).get("tool_call_timeout", 30) + ) except (ValueError, TypeError): timeout = 30 result = await sb.shell.exec( diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index d7d0020f1e..74355f487c 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -147,6 +147,11 @@ "shipyard_neo_profile": "python-default", "shipyard_neo_ttl": 3600, }, + "image_compress_enabled": True, + "image_compress_options": { + "max_size": 1024, + "quality": 95, + }, }, # SubAgent orchestrator mode: # - main_enable = False: disabled; main LLM mounts tools normally (persona selection). @@ -3328,6 +3333,29 @@ class ChatProviderTemplate(TypedDict): "type": "string", "hint": "可使用 {{prompt}} 作为用户输入的占位符。如果不输入占位符则代表添加在用户输入的前面。", }, + "provider_settings.image_compress_enabled": { + "description": "启用图片压缩", + "type": "bool", + "hint": "启用后,发送给多模态模型前会先压缩本地大图片。仅对 chat_completion 提供商生效。", + }, + "provider_settings.image_compress_options.max_size": { + "description": "最大边长", + "type": "int", + "hint": "压缩后图片的最长边,单位为像素。超过该尺寸时会按比例缩放。", + "condition": { + "provider_settings.image_compress_enabled": True, + }, + "slider": {"min": 256, "max": 4096, "step": 64}, + }, + "provider_settings.image_compress_options.quality": { + "description": "压缩质量", + "type": "int", + "hint": "JPEG 输出质量,范围为 1-100。值越高,画质越好,文件也越大。", + "condition": { + "provider_settings.image_compress_enabled": True, + }, + "slider": {"min": 1, "max": 100, "step": 1}, + }, "provider_tts_settings.dual_output": { "description": "开启 TTS 时同时输出语音和文字内容", "type": "bool", diff --git a/astrbot/core/utils/media_utils.py b/astrbot/core/utils/media_utils.py index 8d833514fb..d3f3cc75d3 100644 --- a/astrbot/core/utils/media_utils.py +++ b/astrbot/core/utils/media_utils.py @@ -4,14 +4,23 @@ """ import asyncio +import base64 +import io import os import subprocess import uuid from pathlib import Path +from PIL import Image as PILImage + from astrbot import logger from astrbot.core.utils.astrbot_path import get_astrbot_temp_path +IMAGE_COMPRESS_DEFAULT_MAX_SIZE = 1280 +IMAGE_COMPRESS_DEFAULT_QUALITY = 95 +IMAGE_COMPRESS_DEFAULT_OPTIMIZE = True +IMAGE_COMPRESS_DEFAULT_MIN_FILE_SIZE_MB = 1.0 + async def get_media_duration(file_path: str) -> int | None: """使用ffprobe获取媒体文件时长 @@ -316,3 +325,88 @@ async def extract_video_cover( return output_path except FileNotFoundError: raise Exception("ffmpeg not found") + + +def _compress_image_sync( + data: bytes, + temp_dir: Path, + max_size: int, + quality: int, + optimize: bool, +) -> str: + """Run image compression synchronously via ``asyncio.to_thread``.""" + with PILImage.open(io.BytesIO(data)) as opened_img: + img = opened_img + converted_img: PILImage.Image | None = None + + try: + if img.mode != "RGB": + converted_img = img.convert("RGB") + img = converted_img + + if max(img.size) > max_size: + img.thumbnail((max_size, max_size), PILImage.Resampling.LANCZOS) + + new_uuid = uuid.uuid4().hex + save_path = temp_dir / f"compressed_{new_uuid}.jpg" + img.save(save_path, "JPEG", quality=quality, optimize=optimize) + logger.debug(f"Image compressed successfully: {save_path}") + return str(save_path) + finally: + if converted_img is not None: + converted_img.close() + + +async def compress_image( + url_or_path: str, + max_size: int = IMAGE_COMPRESS_DEFAULT_MAX_SIZE, + quality: int = IMAGE_COMPRESS_DEFAULT_QUALITY, +) -> str: + """Compress large user-uploaded images. + + Args: + url_or_path: Image path or URL. + max_size: Longest edge of the compressed image in pixels. + quality: JPEG output quality in the range 1-100. + + Returns: + The compressed image path. Returns the original path if compression + fails or the source does not need compression. + """ + max_size = max(int(max_size), 1) + quality = min(max(int(quality), 1), 100) + optimize = IMAGE_COMPRESS_DEFAULT_OPTIMIZE + min_file_size_bytes = int(IMAGE_COMPRESS_DEFAULT_MIN_FILE_SIZE_MB * 1024 * 1024) + data = None + # Skip compression for remote images and return the original value. + if url_or_path.startswith("http"): + return url_or_path + elif url_or_path.startswith("data:image"): + _header, encoded = url_or_path.split(",", 1) + data = base64.b64decode(encoded) + if len(data) < min_file_size_bytes: + return url_or_path + else: + local_path = Path(url_or_path) + if not local_path.exists(): + return url_or_path + if local_path.stat().st_size < min_file_size_bytes: + return url_or_path + with local_path.open("rb") as f: + data = f.read() + + if not data: + return url_or_path + + temp_dir = Path(get_astrbot_temp_path()) + temp_dir.mkdir(parents=True, exist_ok=True) + + # Offload the blocking image processing task to a thread. + return await asyncio.to_thread( + _compress_image_sync, + data, + temp_dir, + max_size, + quality, + optimize, + ) diff --git a/dashboard/src/i18n/locales/en-US/features/config-metadata.json b/dashboard/src/i18n/locales/en-US/features/config-metadata.json index 5688b4e45a..32f6b2733e 100644 --- a/dashboard/src/i18n/locales/en-US/features/config-metadata.json +++ b/dashboard/src/i18n/locales/en-US/features/config-metadata.json @@ -335,6 +335,22 @@ "description": "User Prompt", "hint": "You can use {{prompt}} as a placeholder for user input. If no placeholder is provided, it will be added before the user input." }, + "image_compress_enabled": { + "description": "Enable image compression", + "hint": "When enabled, large local images are compressed before being sent to multimodal models. Applies only to chat_completion providers." + }, + "image_compress_options": { + "description": "Image compression settings", + "hint": "Control image resize limits, JPEG quality, and the minimum size threshold for compression.", + "max_size": { + "description": "Maximum edge length", + "hint": "Longest edge of the compressed image in pixels. Images larger than this are resized proportionally." + }, + "quality": { + "description": "JPEG quality", + "hint": "JPEG output quality from 1 to 100. Higher values preserve more detail but produce larger files." + } + }, "reachability_check": { "description": "Provider Reachability Check", "hint": "When running the /provider command, test provider connectivity in parallel. This actively pings models and may consume extra tokens." @@ -1523,4 +1539,4 @@ "helpMiddle": "or", "helpSuffix": "." } -} \ No newline at end of file +} diff --git a/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json b/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json index 56d12c9838..1a6961e8db 100644 --- a/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json +++ b/dashboard/src/i18n/locales/ru-RU/features/config-metadata.json @@ -335,6 +335,22 @@ "description": "Промпт пользователя", "hint": "Вы можете использовать {{prompt}} как заполнитель для ввода. Если заполнитель не указан, он будет добавлен перед текстом пользователя." }, + "image_compress_enabled": { + "description": "Включить сжатие изображений", + "hint": "Когда включено, большие локальные изображения сжимаются перед отправкой в мультимодальные модели. Применяется только к провайдерам chat_completion." + }, + "image_compress_options": { + "description": "Настройки сжатия изображений", + "hint": "Управляет ограничением размера, качеством JPEG и минимальным порогом размера для сжатия.", + "max_size": { + "description": "Максимальная длина стороны", + "hint": "Максимальная длина стороны сжатого изображения в пикселях. Более крупные изображения пропорционально уменьшаются." + }, + "quality": { + "description": "Качество JPEG", + "hint": "Качество JPEG от 1 до 100. Более высокие значения сохраняют больше деталей, но увеличивают размер файла." + } + }, "reachability_check": { "description": "Проверка доступности провайдеров", "hint": "При выполнении команды /provider проверяет связь со всеми моделями. Это может расходовать токены." diff --git a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json index be52995189..83a9cbb4bb 100644 --- a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json +++ b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json @@ -337,6 +337,22 @@ "description": "用户提示词", "hint": "可使用 {{prompt}} 作为用户输入的占位符。如果不输入占位符则代表添加在用户输入的前面。" }, + "image_compress_enabled": { + "description": "启用图片压缩", + "hint": "启用后,发送给多模态模型前会先压缩本地大图片。仅对 chat_completion 提供商生效。" + }, + "image_compress_options": { + "description": "图片压缩配置", + "hint": "用于控制图片压缩的尺寸、质量和触发阈值。", + "max_size": { + "description": "最大边长", + "hint": "压缩后图片的最长边,单位为像素。超过该尺寸时会按比例缩放。" + }, + "quality": { + "description": "JPEG 质量", + "hint": "JPEG 输出质量,范围为 1-100。值越高,画质越好,文件也越大。" + } + }, "reachability_check": { "description": "提供商可达性检测", "hint": "/provider 命令列出模型时并发检测连通性。开启后会主动调用模型测试连通性,可能产生额外 token 消耗。"