Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 44 additions & 33 deletions pkg/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,38 +250,23 @@ func (a *App) Run(ctx context.Context, cancel context.CancelFunc, message string

go func() {
if len(attachments) > 0 {
// Strip attachment placeholders from the message text
// Placeholders are in the format @/path/to/file
cleanMessage := message
for placeholder := range attachments {
cleanMessage = strings.ReplaceAll(cleanMessage, placeholder, "")
}
cleanMessage = strings.TrimSpace(cleanMessage)
if cleanMessage == "" {
cleanMessage = "Please analyze this attached file."
}
// Build a single text string with the user's message and inlined text files.
// Keeping everything in one text block ensures the model sees file content
// together with the message, rather than as separate content blocks.
var textBuilder strings.Builder
textBuilder.WriteString(message)

multiContent := []chat.MessagePart{
{
Type: chat.MessagePartTypeText,
Text: cleanMessage,
},
}
// multiContent holds the combined text part plus any binary file parts.
var binaryParts []chat.MessagePart

// Attachments are keyed by @filepath placeholder
// Extract the file path and add as file attachment for provider upload.
// Note: There is an inherent TOCTOU race between this validation and when
// the provider reads the file during upload. This validation catches common
// cases (deleted files, wrong paths) but files could still change before upload.
// Attachments are keyed by @filepath placeholder.
for placeholder := range attachments {
filePath := strings.TrimPrefix(placeholder, "@")
if filePath == "" {
slog.Debug("skipping attachment with empty file path", "placeholder", placeholder)
continue
}

// Convert to absolute path to ensure consistency with provider upload code
// and prevent issues if working directory changes between validation and upload
absPath, err := filepath.Abs(filePath)
if err != nil {
slog.Warn("skipping attachment: invalid path", "path", filePath, "error", err)
Expand Down Expand Up @@ -319,22 +304,48 @@ func (a *App) Run(ctx context.Context, cancel context.CancelFunc, message string
}

mimeType := chat.DetectMimeType(absPath)
if !chat.IsSupportedMimeType(mimeType) {

switch {
case chat.IsTextFile(absPath):
// Text files are appended to the message text so the model
// sees them in a single content block alongside the user's question.
if fi.Size() > chat.MaxInlineFileSize {
slog.Warn("skipping attachment: text file too large to inline", "path", absPath, "size", fi.Size(), "max", chat.MaxInlineFileSize)
a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: text file too large to inline (max 1MB)", filePath), "")
continue
}
content, err := chat.ReadFileForInline(absPath)
if err != nil {
slog.Warn("skipping attachment: failed to read file", "path", absPath, "error", err)
a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: failed to read file", filePath), "")
continue
}
textBuilder.WriteString("\n\n")
textBuilder.WriteString(content)

case chat.IsSupportedMimeType(mimeType):
// Binary files (images, PDFs) are kept as separate file parts.
binaryParts = append(binaryParts, chat.MessagePart{
Type: chat.MessagePartTypeFile,
File: &chat.MessageFile{
Path: absPath,
MimeType: mimeType,
},
})

default:
slog.Warn("skipping attachment: unsupported file type", "path", absPath, "mime_type", mimeType)
a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: unsupported file type (supported: images, pdf, txt, md)", filePath), "")
a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: unsupported file type", filePath), "")
continue
}
}

multiContent = append(multiContent, chat.MessagePart{
Type: chat.MessagePartTypeFile,
File: &chat.MessageFile{
Path: absPath,
MimeType: mimeType,
},
})
multiContent := []chat.MessagePart{
{Type: chat.MessagePartTypeText, Text: textBuilder.String()},
}
multiContent = append(multiContent, binaryParts...)

a.session.AddMessage(session.UserMessage(cleanMessage, multiContent...))
a.session.AddMessage(session.UserMessage(message, multiContent...))
} else {
a.session.AddMessage(session.UserMessage(message))
}
Expand Down
161 changes: 153 additions & 8 deletions pkg/chat/chat.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
package chat

import (
"fmt"
"os"
"path/filepath"
"strings"
"unicode/utf8"

"github.com/docker/cagent/pkg/tools"
)

// MaxInlineFileSize is the maximum size of a text file that can be inlined
// directly into a message. Files larger than this are skipped with a warning.
// This is much smaller than the general upload limit because inline content
// expands token usage significantly.
const MaxInlineFileSize = 5 * 1024 * 1024 // 5MB

type MessageRole string

const (
Expand Down Expand Up @@ -162,8 +171,9 @@ type MessageStream interface {

// DetectMimeType returns the MIME type for a file based on its extension.
// This is the canonical implementation used across all packages for consistency.
// Note: Only returns MIME types that are supported for file attachments.
// Unsupported extensions return "application/octet-stream".
// For binary file types (images, PDF), returns the specific MIME type.
// For text-based files, returns "text/plain".
// Unrecognized extensions return "application/octet-stream".
func DetectMimeType(filePath string) string {
ext := strings.ToLower(filepath.Ext(filePath))
switch ext {
Expand All @@ -176,14 +186,13 @@ func DetectMimeType(filePath string) string {
return "image/gif"
case ".webp":
return "image/webp"
// Documents
// PDF
case ".pdf":
return "application/pdf"
case ".txt", ".json", ".csv":
return "text/plain"
case ".md", ".markdown":
return "text/markdown"
default:
if isTextExtension(ext) {
return "text/plain"
}
return "application/octet-stream"
}
}
Expand All @@ -194,9 +203,145 @@ func IsSupportedMimeType(mimeType string) bool {
switch mimeType {
case "image/jpeg", "image/png", "image/gif", "image/webp":
return true
case "application/pdf", "text/plain", "text/markdown":
case "application/pdf", "text/plain":
return true
default:
return false
}
}

// IsTextFile determines if a file at the given path is a text file that should
// be inlined into the message rather than uploaded via a provider's file API.
// It first checks the file extension against a broad allowlist of known text
// extensions. For unknown extensions, it falls back to reading the first 8KB
// and checking if the content is valid UTF-8 with no null bytes.
func IsTextFile(filePath string) bool {
ext := strings.ToLower(filepath.Ext(filePath))
if isTextExtension(ext) {
return true
}
// For unknown extensions, try byte-sniffing
return isTextByContent(filePath)
}

// ReadFileForInline reads a text file and wraps it in an XML-like tag with
// the file path for context. Returns the formatted content and any error.
func ReadFileForInline(filePath string) (string, error) {
data, err := os.ReadFile(filePath)
if err != nil {
return "", fmt.Errorf("failed to read file: %w", err)
}
return fmt.Sprintf("<attached_file path=%q>\n%s\n</attached_file>", filePath, string(data)), nil
}

// isTextExtension returns true for file extensions known to be text-based.
// This includes programming languages, config files, markup, and data formats.
func isTextExtension(ext string) bool {
switch ext {
// Plain text and documentation
case ".txt", ".text", ".log",
".md", ".markdown", ".mdown", ".mkd", ".mdx",
".rst", ".adoc", ".asciidoc",
".org", ".wiki", ".tex", ".latex",
// Web
".html", ".htm", ".xhtml", ".css", ".scss", ".sass", ".less",
".js", ".jsx", ".mjs", ".cjs",
".ts", ".tsx", ".mts", ".cts",
".vue", ".svelte", ".astro",
// Programming languages
".go", ".py", ".pyi", ".pyw",
".rb", ".rbw",
".rs",
".java", ".kt", ".kts", ".groovy", ".scala", ".clj", ".cljs",
".c", ".h", ".cpp", ".cc", ".cxx", ".hpp", ".hxx", ".hh",
".cs", ".fs", ".fsx",
".swift", ".m", ".mm",
".r", ".R",
".lua",
".pl", ".pm", ".perl",
".php",
".ex", ".exs",
".erl", ".hrl",
".hs", ".lhs",
".ml", ".mli",
".nim",
".zig",
".v", ".sv",
".dart",
".jl",
".d",
".pas", ".pp",
".lisp", ".cl", ".el",
".tcl",
".awk",
".sql",
// Shell and scripting
".sh", ".bash", ".zsh", ".fish", ".ksh", ".csh",
".ps1", ".psm1", ".psd1",
".bat", ".cmd",
// Data and config formats
".json", ".jsonl", ".ndjson", ".json5", ".jsonc",
".yaml", ".yml",
".toml",
".xml", ".xsl", ".xslt", ".xsd", ".dtd",
".csv", ".tsv",
".ini", ".cfg", ".conf", ".config",
".env", ".envrc",
".properties",
".plist",
".hcl", ".tf", ".tfvars",
// Build and project files
".makefile", ".mk",
".cmake",
".gradle",
".sbt",
".cabal",
".gemspec",
".podspec",
// Docker and container
".dockerfile",
".containerfile",
// Git
".gitignore", ".gitattributes", ".gitmodules",
// Editor and IDE config
".editorconfig",
".eslintrc", ".prettierrc", ".stylelintrc",
// Other
".proto", ".thrift", ".avsc",
".graphql", ".gql",
".svg",
".diff", ".patch":
return true
default:
return false
}
}

// isTextByContent reads the first 8KB of a file and checks if it looks like text.
// A file is considered text if it's valid UTF-8 and contains no null bytes.
func isTextByContent(filePath string) bool {
f, err := os.Open(filePath)
if err != nil {
return false
}
defer f.Close()

buf := make([]byte, 8*1024)
n, _ := f.Read(buf)
if n == 0 {
// Empty files are treated as text
return true
}

data := buf[:n]

// Check for null bytes (strong binary indicator)
for _, b := range data {
if b == 0 {
return false
}
}

// Check if the content is valid UTF-8
return utf8.Valid(data)
}
Loading