Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 71 additions & 4 deletions astrbot/core/astr_main_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@
from astrbot.core.tools.send_message import SEND_MESSAGE_TO_USER_TOOL
from astrbot.core.utils.file_extract import extract_file_moonshotai
from astrbot.core.utils.llm_metadata import LLM_METADATAS
from astrbot.core.utils.media_utils import (
IMAGE_COMPRESS_DEFAULT_MAX_SIZE,
IMAGE_COMPRESS_DEFAULT_QUALITY,
compress_image,
)
from astrbot.core.utils.quoted_message.settings import (
SETTINGS as DEFAULT_QUOTED_MESSAGE_SETTINGS,
)
Expand Down Expand Up @@ -445,10 +450,14 @@ async def _ensure_img_caption(
image_caption_provider: str,
) -> None:
try:
compressed_urls = []
for url in req.image_urls:
compressed_url = await _compress_image_for_provider(url, cfg)
compressed_urls.append(compressed_url)
caption = await _request_img_caption(
image_caption_provider,
cfg,
req.image_urls,
compressed_urls,
plugin_context,
)
if caption:
Expand All @@ -458,6 +467,9 @@ async def _ensure_img_caption(
req.image_urls = []
except Exception as exc: # noqa: BLE001
logger.error("处理图片描述失败: %s", exc)
req.extra_user_content_parts.append(TextPart(text="[Image Captioning Failed]"))
finally:
req.image_urls = []
Comment on lines +471 to +472


def _append_quoted_image_attachment(req: ProviderRequest, image_path: str) -> None:
Expand All @@ -477,12 +489,53 @@ def _get_quoted_message_parser_settings(
return DEFAULT_QUOTED_MESSAGE_SETTINGS.with_overrides(overrides)


def _get_image_compress_args(
provider_settings: dict[str, object] | None,
) -> tuple[bool, int, int]:
if not isinstance(provider_settings, dict):
return True, IMAGE_COMPRESS_DEFAULT_MAX_SIZE, IMAGE_COMPRESS_DEFAULT_QUALITY

enabled = provider_settings.get("image_compress_enabled", True)
if not isinstance(enabled, bool):
enabled = True

raw_options = provider_settings.get("image_compress_options", {})
options = raw_options if isinstance(raw_options, dict) else {}

max_size = options.get("max_size", IMAGE_COMPRESS_DEFAULT_MAX_SIZE)
if not isinstance(max_size, int):
max_size = IMAGE_COMPRESS_DEFAULT_MAX_SIZE
max_size = max(max_size, 1)

quality = options.get("quality", IMAGE_COMPRESS_DEFAULT_QUALITY)
if not isinstance(quality, int):
quality = IMAGE_COMPRESS_DEFAULT_QUALITY
quality = min(max(quality, 1), 100)

return enabled, max_size, quality


async def _compress_image_for_provider(
url_or_path: str,
provider_settings: dict[str, object] | None,
) -> str:
try:
enabled, max_size, quality = _get_image_compress_args(provider_settings)
if not enabled:
return url_or_path
return await compress_image(url_or_path, max_size=max_size, quality=quality)
except Exception as exc: # noqa: BLE001
logger.error("Image compression failed: %s", exc)
return url_or_path


async def _process_quote_message(
event: AstrMessageEvent,
req: ProviderRequest,
img_cap_prov_id: str,
plugin_context: Context,
quoted_message_settings: QuotedMessageParserSettings = DEFAULT_QUOTED_MESSAGE_SETTINGS,
config: MainAgentBuildConfig | None = None,
) -> None:
quote = None
for comp in event.message_obj.message:
Expand Down Expand Up @@ -521,9 +574,14 @@ async def _process_quote_message(
prov = plugin_context.get_using_provider(event.unified_msg_origin)

if prov and isinstance(prov, Provider):
path = await image_seg.convert_to_file_path()
image_path = await _compress_image_for_provider(
path,
config.provider_settings if config else None,
)
llm_resp = await prov.text_chat(
prompt=IMAGE_CAPTION_DEFAULT_PROMPT,
image_urls=[await image_seg.convert_to_file_path()],
image_urls=[image_path],
)
if llm_resp.completion_text:
content_parts.append(
Expand Down Expand Up @@ -615,6 +673,7 @@ async def _decorate_llm_request(
img_cap_prov_id,
plugin_context,
quoted_message_settings,
config,
)

tz = config.timezone
Expand Down Expand Up @@ -936,7 +995,11 @@ async def build_main_agent(
# media files attachments
for comp in event.message_obj.message:
if isinstance(comp, Image):
image_path = await comp.convert_to_file_path()
path = await comp.convert_to_file_path()
image_path = await _compress_image_for_provider(
path,
config.provider_settings,
)
req.image_urls.append(image_path)
req.extra_user_content_parts.append(
TextPart(text=f"[Image Attachment: path {image_path}]")
Expand All @@ -963,7 +1026,11 @@ async def build_main_agent(
for reply_comp in comp.chain:
if isinstance(reply_comp, Image):
has_embedded_image = True
image_path = await reply_comp.convert_to_file_path()
path = await reply_comp.convert_to_file_path()
image_path = await _compress_image_for_provider(
path,
config.provider_settings,
)
req.image_urls.append(image_path)
_append_quoted_image_attachment(req, image_path)
elif isinstance(reply_comp, File):
Expand Down
4 changes: 3 additions & 1 deletion astrbot/core/computer/tools/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ async def call(
umo=context.context.event.unified_msg_origin
)
try:
timeout = int(config.get("provider_settings", {}).get("tool_call_timeout", 30))
timeout = int(
config.get("provider_settings", {}).get("tool_call_timeout", 30)
)
except (ValueError, TypeError):
timeout = 30
result = await sb.shell.exec(
Expand Down
28 changes: 28 additions & 0 deletions astrbot/core/config/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@
"shipyard_neo_profile": "python-default",
"shipyard_neo_ttl": 3600,
},
"image_compress_enabled": True,
"image_compress_options": {
"max_size": 1024,
"quality": 95,
},
},
# SubAgent orchestrator mode:
# - main_enable = False: disabled; main LLM mounts tools normally (persona selection).
Expand Down Expand Up @@ -3328,6 +3333,29 @@ class ChatProviderTemplate(TypedDict):
"type": "string",
"hint": "可使用 {{prompt}} 作为用户输入的占位符。如果不输入占位符则代表添加在用户输入的前面。",
},
"provider_settings.image_compress_enabled": {
"description": "启用图片压缩",
"type": "bool",
"hint": "启用后,发送给多模态模型前会先压缩本地大图片。仅对 chat_completion 提供商生效。",
},
"provider_settings.image_compress_options.max_size": {
"description": "最大边长",
"type": "int",
"hint": "压缩后图片的最长边,单位为像素。超过该尺寸时会按比例缩放。",
"condition": {
"provider_settings.image_compress_enabled": True,
},
"slider": {"min": 256, "max": 4096, "step": 64},
},
"provider_settings.image_compress_options.quality": {
"description": "压缩质量",
"type": "int",
"hint": "JPEG 输出质量,范围为 1-100。值越高,画质越好,文件也越大。",
"condition": {
"provider_settings.image_compress_enabled": True,
},
"slider": {"min": 1, "max": 100, "step": 1},
},
"provider_tts_settings.dual_output": {
"description": "开启 TTS 时同时输出语音和文字内容",
"type": "bool",
Expand Down
94 changes: 94 additions & 0 deletions astrbot/core/utils/media_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,23 @@
"""

import asyncio
import base64
import io
import os
import subprocess
import uuid
from pathlib import Path

from PIL import Image as PILImage

from astrbot import logger
from astrbot.core.utils.astrbot_path import get_astrbot_temp_path

IMAGE_COMPRESS_DEFAULT_MAX_SIZE = 1280
IMAGE_COMPRESS_DEFAULT_QUALITY = 95
IMAGE_COMPRESS_DEFAULT_OPTIMIZE = True
IMAGE_COMPRESS_DEFAULT_MIN_FILE_SIZE_MB = 1.0


async def get_media_duration(file_path: str) -> int | None:
"""使用ffprobe获取媒体文件时长
Expand Down Expand Up @@ -316,3 +325,88 @@ async def extract_video_cover(
return output_path
except FileNotFoundError:
raise Exception("ffmpeg not found")


def _compress_image_sync(
data: bytes,
temp_dir: Path,
max_size: int,
quality: int,
optimize: bool,
) -> str:
"""Run image compression synchronously via ``asyncio.to_thread``."""
with PILImage.open(io.BytesIO(data)) as opened_img:
img = opened_img
converted_img: PILImage.Image | None = None

try:
if img.mode != "RGB":
converted_img = img.convert("RGB")
img = converted_img

if max(img.size) > max_size:
img.thumbnail((max_size, max_size), PILImage.Resampling.LANCZOS)

new_uuid = uuid.uuid4().hex
save_path = temp_dir / f"compressed_{new_uuid}.jpg"
img.save(save_path, "JPEG", quality=quality, optimize=optimize)
logger.debug(f"Image compressed successfully: {save_path}")
return str(save_path)
finally:
if converted_img is not None:
converted_img.close()


async def compress_image(
url_or_path: str,
max_size: int = IMAGE_COMPRESS_DEFAULT_MAX_SIZE,
quality: int = IMAGE_COMPRESS_DEFAULT_QUALITY,
) -> str:
"""Compress large user-uploaded images.

Args:
url_or_path: Image path or URL.
max_size: Longest edge of the compressed image in pixels.
quality: JPEG output quality in the range 1-100.

Returns:
The compressed image path. Returns the original path if compression
fails or the source does not need compression.
"""
max_size = max(int(max_size), 1)
quality = min(max(int(quality), 1), 100)
optimize = IMAGE_COMPRESS_DEFAULT_OPTIMIZE
min_file_size_bytes = int(IMAGE_COMPRESS_DEFAULT_MIN_FILE_SIZE_MB * 1024 * 1024)
data = None
# Skip compression for remote images and return the original value.
if url_or_path.startswith("http"):
return url_or_path
elif url_or_path.startswith("data:image"):
_header, encoded = url_or_path.split(",", 1)
data = base64.b64decode(encoded)
if len(data) < min_file_size_bytes:
return url_or_path
else:
local_path = Path(url_or_path)
if not local_path.exists():
return url_or_path
if local_path.stat().st_size < min_file_size_bytes:
return url_or_path
with local_path.open("rb") as f:
data = f.read()

if not data:
return url_or_path

temp_dir = Path(get_astrbot_temp_path())
temp_dir.mkdir(parents=True, exist_ok=True)

# Offload the blocking image processing task to a thread.
return await asyncio.to_thread(
_compress_image_sync,
data,
temp_dir,
max_size,
quality,
optimize,
)
18 changes: 17 additions & 1 deletion dashboard/src/i18n/locales/en-US/features/config-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,22 @@
"description": "User Prompt",
"hint": "You can use {{prompt}} as a placeholder for user input. If no placeholder is provided, it will be added before the user input."
},
"image_compress_enabled": {
"description": "Enable image compression",
"hint": "When enabled, large local images are compressed before being sent to multimodal models. Applies only to chat_completion providers."
},
"image_compress_options": {
"description": "Image compression settings",
"hint": "Control image resize limits, JPEG quality, and the minimum size threshold for compression.",
"max_size": {
"description": "Maximum edge length",
"hint": "Longest edge of the compressed image in pixels. Images larger than this are resized proportionally."
},
"quality": {
"description": "JPEG quality",
"hint": "JPEG output quality from 1 to 100. Higher values preserve more detail but produce larger files."
}
},
"reachability_check": {
"description": "Provider Reachability Check",
"hint": "When running the /provider command, test provider connectivity in parallel. This actively pings models and may consume extra tokens."
Expand Down Expand Up @@ -1523,4 +1539,4 @@
"helpMiddle": "or",
"helpSuffix": "."
}
}
}
16 changes: 16 additions & 0 deletions dashboard/src/i18n/locales/ru-RU/features/config-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,22 @@
"description": "Промпт пользователя",
"hint": "Вы можете использовать {{prompt}} как заполнитель для ввода. Если заполнитель не указан, он будет добавлен перед текстом пользователя."
},
"image_compress_enabled": {
"description": "Включить сжатие изображений",
"hint": "Когда включено, большие локальные изображения сжимаются перед отправкой в мультимодальные модели. Применяется только к провайдерам chat_completion."
},
"image_compress_options": {
"description": "Настройки сжатия изображений",
"hint": "Управляет ограничением размера, качеством JPEG и минимальным порогом размера для сжатия.",
"max_size": {
"description": "Максимальная длина стороны",
"hint": "Максимальная длина стороны сжатого изображения в пикселях. Более крупные изображения пропорционально уменьшаются."
},
"quality": {
"description": "Качество JPEG",
"hint": "Качество JPEG от 1 до 100. Более высокие значения сохраняют больше деталей, но увеличивают размер файла."
}
},
"reachability_check": {
"description": "Проверка доступности провайдеров",
"hint": "При выполнении команды /provider проверяет связь со всеми моделями. Это может расходовать токены."
Expand Down
16 changes: 16 additions & 0 deletions dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,22 @@
"description": "用户提示词",
"hint": "可使用 {{prompt}} 作为用户输入的占位符。如果不输入占位符则代表添加在用户输入的前面。"
},
"image_compress_enabled": {
"description": "启用图片压缩",
"hint": "启用后,发送给多模态模型前会先压缩本地大图片。仅对 chat_completion 提供商生效。"
},
"image_compress_options": {
"description": "图片压缩配置",
"hint": "用于控制图片压缩的尺寸、质量和触发阈值。",
"max_size": {
"description": "最大边长",
"hint": "压缩后图片的最长边,单位为像素。超过该尺寸时会按比例缩放。"
},
"quality": {
"description": "JPEG 质量",
"hint": "JPEG 输出质量,范围为 1-100。值越高,画质越好,文件也越大。"
}
},
"reachability_check": {
"description": "提供商可达性检测",
"hint": "/provider 命令列出模型时并发检测连通性。开启后会主动调用模型测试连通性,可能产生额外 token 消耗。"
Expand Down
Loading