From de2bfcfd9b04f349b8ededd58ed2ff7681b3a039 Mon Sep 17 00:00:00 2001 From: antdev <237216263+yumesha@users.noreply.github.com> Date: Mon, 16 Feb 2026 20:38:46 +0800 Subject: [PATCH] feat(ui): add video clipboard paste support for shell and print mode - Add ClipboardVideo class and grab_video_from_clipboard() in clipboard.py - Add video support to AttachmentCache with store_video_reference() - Add _try_paste_video() method in prompt.py for Ctrl-V handling - Add _extract_video_paths() and _build_content_parts() in print/__init__.py - Export VIDEO_EXTENSIONS constant from file utils - Update documentation for video clipboard paste - Add tests for video attachment handling --- docs/en/guides/interaction.md | 22 ++++- docs/zh/guides/interaction.md | 22 ++++- src/kimi_cli/tools/file/utils.py | 3 + src/kimi_cli/ui/print/__init__.py | 94 +++++++++++++++++++- src/kimi_cli/ui/shell/prompt.py | 87 ++++++++++++++++++- src/kimi_cli/utils/clipboard.py | 74 ++++++++++++++++ tests/test_attachment_cache.py | 59 +++++++++++++ tests/test_print_video.py | 139 ++++++++++++++++++++++++++++++ 8 files changed, 489 insertions(+), 11 deletions(-) create mode 100644 tests/test_print_video.py diff --git a/docs/en/guides/interaction.md b/docs/en/guides/interaction.md index 432af4390..ff325ae76 100644 --- a/docs/en/guides/interaction.md +++ b/docs/en/guides/interaction.md @@ -45,14 +45,30 @@ Sometimes you need to enter multiple lines, such as pasting a code snippet or er After finishing your input, press `Enter` to send the complete message. -## Clipboard and image paste +## Clipboard and media paste -Press `Ctrl-V` to paste text or images from the clipboard. +Press `Ctrl-V` to paste text, images, or video files from the clipboard. -If the clipboard contains an image, Kimi Code CLI will automatically add the image as an attachment to the message. After sending the message, the AI can see and analyze the image. +If the clipboard contains an **image**, Kimi Code CLI will automatically add the image as an attachment to the message. After sending the message, the AI can see and analyze the image. + +If the clipboard contains a **video file path**, Kimi Code CLI will insert a reference to the video file. The AI can then use the `ReadMediaFile` tool to read and analyze the video content. + +Supported video formats include: MP4, MKV, AVI, MOV, WMV, WebM, M4V, FLV, 3GP, and 3G2. + +### Video input in print mode (non-interactive) + +When using [print mode](../customization/print-mode.md) with `-c` or `--command`, you can reference video files directly: + +```sh +kimi --print -c "Analyze this video /path/to/video.mp4" +``` + +Kimi Code CLI will automatically detect video file paths in your command and make them available to the AI for analysis. ::: tip Image input requires the model to support the `image_in` capability. Video input requires the `video_in` capability. + +Models like [Kimi K2.5](https://huggingface.co/moonshotai/Kimi-K2.5) support video understanding with strong performance on benchmarks like VideoMMMU (86.6), VideoMME (87.4), and LongVideoBench (79.8). ::: ## Slash commands diff --git a/docs/zh/guides/interaction.md b/docs/zh/guides/interaction.md index af1e8f007..6e50cae45 100644 --- a/docs/zh/guides/interaction.md +++ b/docs/zh/guides/interaction.md @@ -45,14 +45,30 @@ Thinking 模式需要当前模型支持。部分模型(如 `kimi-k2-thinking-t 输入完成后,按 `Enter` 发送整条消息。 -## 剪贴板与图片粘贴 +## 剪贴板与媒体粘贴 -按 `Ctrl-V` 可以粘贴剪贴板中的文本或图片。 +按 `Ctrl-V` 可以粘贴剪贴板中的文本、图片或视频文件。 -如果剪贴板中是图片,Kimi Code CLI 会自动将图片作为附件添加到消息中。发送消息后,AI 可以看到并分析这张图片。 +如果剪贴板中是**图片**,Kimi Code CLI 会自动将图片作为附件添加到消息中。发送消息后,AI 可以看到并分析这张图片。 + +如果剪贴板中是**视频文件路径**,Kimi Code CLI 会插入该视频文件的引用。AI 随后可以使用 `ReadMediaFile` 工具读取和分析视频内容。 + +支持的视频格式包括:MP4、MKV、AVI、MOV、WMV、WebM、M4V、FLV、3GP 和 3G2。 + +### Print 模式(非交互式)中的视频输入 + +在使用 [Print 模式](../customization/print-mode.md) 配合 `-c` 或 `--command` 时,你可以直接引用视频文件: + +```sh +kimi --print -c "分析这个视频 /path/to/video.mp4" +``` + +Kimi Code CLI 会自动检测命令中的视频文件路径,并让 AI 进行分析。 ::: tip 提示 图片输入需要当前模型支持 `image_in` 能力,视频输入需要支持 `video_in` 能力。 + +像 [Kimi K2.5](https://huggingface.co/moonshotai/Kimi-K2.5) 这样的模型支持视频理解,在 VideoMMMU (86.6)、VideoMME (87.4) 和 LongVideoBench (79.8) 等基准测试中表现优异。 ::: ## 斜杠命令 diff --git a/src/kimi_cli/tools/file/utils.py b/src/kimi_cli/tools/file/utils.py index d674f8989..2f58e6e04 100644 --- a/src/kimi_cli/tools/file/utils.py +++ b/src/kimi_cli/tools/file/utils.py @@ -52,6 +52,9 @@ ".3gp": "video/3gpp", ".3g2": "video/3gpp2", } + +# Public export for video extensions mapping +VIDEO_EXTENSIONS = _VIDEO_MIME_BY_SUFFIX _TEXT_MIME_BY_SUFFIX = { ".svg": "image/svg+xml", } diff --git a/src/kimi_cli/ui/print/__init__.py b/src/kimi_cli/ui/print/__init__.py index b48466bfc..501740b87 100644 --- a/src/kimi_cli/ui/print/__init__.py +++ b/src/kimi_cli/ui/print/__init__.py @@ -2,12 +2,13 @@ import asyncio import json +import re import sys from functools import partial from pathlib import Path from kosong.chat_provider import ChatProviderError -from kosong.message import Message +from kosong.message import ContentPart, Message, TextPart from rich import print from kimi_cli.cli import InputFormat, OutputFormat @@ -20,10 +21,95 @@ run_soul, ) from kimi_cli.soul.kimisoul import KimiSoul +from kimi_cli.tools.file.utils import VIDEO_EXTENSIONS from kimi_cli.ui.print.visualize import visualize from kimi_cli.utils.logging import logger from kimi_cli.utils.signals import install_sigint_handler +def _extract_video_paths(text: str) -> list[tuple[int, int, Path]]: + """Extract video file paths from text. + + Returns list of (start, end, path) tuples for each video file found. + Only includes paths that actually exist as files. + Handles paths with spaces and special characters in filenames by trying + progressively longer paths from the extension backwards. + """ + results: list[tuple[int, int, Path]] = [] + video_exts = "|".join(ext.lstrip(".") for ext in VIDEO_EXTENSIONS.keys()) + + # Find all video extension occurrences (not using \b to avoid issues with [ or other chars) + # Match extensions followed by space, punctuation, or end of string + for match in re.finditer(rf"\.({video_exts})(?=\s|$|[.,;!?])", text, re.IGNORECASE): + ext_end = match.end() + + # Try progressively longer paths from the extension backwards + # Start from the beginning of the text and expand until we find a valid file + best_match: tuple[int, Path] | None = None + + # Try each possible start position, preferring longer paths + for start_candidate in range(0, ext_end): + # Must start at word boundary or with @ or / + if start_candidate > 0 and text[start_candidate - 1] not in " \t\n": + continue + + path_str = text[start_candidate:ext_end] + + # Remove @ prefix for validation + check_path_str = path_str[1:] if path_str.startswith("@") else path_str + path = Path(check_path_str) + + # Check if this is a valid video file + if path.suffix.lower() in VIDEO_EXTENSIONS and path.is_file(): + # Found a valid file - update best match (preferring longer paths) + best_match = (start_candidate, path) + + if best_match is not None: + start_pos, path = best_match + results.append((start_pos, ext_end, path)) + + return results + + +def _build_content_parts(command: str) -> list[ContentPart]: + """Build content parts from command, detecting video files. + + Similar to the web UI, video files are wrapped in