diff --git a/pkg/app/app.go b/pkg/app/app.go index 218f844a6..8e79d6782 100644 --- a/pkg/app/app.go +++ b/pkg/app/app.go @@ -250,29 +250,16 @@ func (a *App) Run(ctx context.Context, cancel context.CancelFunc, message string go func() { if len(attachments) > 0 { - // Strip attachment placeholders from the message text - // Placeholders are in the format @/path/to/file - cleanMessage := message - for placeholder := range attachments { - cleanMessage = strings.ReplaceAll(cleanMessage, placeholder, "") - } - cleanMessage = strings.TrimSpace(cleanMessage) - if cleanMessage == "" { - cleanMessage = "Please analyze this attached file." - } + // Build a single text string with the user's message and inlined text files. + // Keeping everything in one text block ensures the model sees file content + // together with the message, rather than as separate content blocks. + var textBuilder strings.Builder + textBuilder.WriteString(message) - multiContent := []chat.MessagePart{ - { - Type: chat.MessagePartTypeText, - Text: cleanMessage, - }, - } + // multiContent holds the combined text part plus any binary file parts. + var binaryParts []chat.MessagePart - // Attachments are keyed by @filepath placeholder - // Extract the file path and add as file attachment for provider upload. - // Note: There is an inherent TOCTOU race between this validation and when - // the provider reads the file during upload. This validation catches common - // cases (deleted files, wrong paths) but files could still change before upload. + // Attachments are keyed by @filepath placeholder. for placeholder := range attachments { filePath := strings.TrimPrefix(placeholder, "@") if filePath == "" { @@ -280,8 +267,6 @@ func (a *App) Run(ctx context.Context, cancel context.CancelFunc, message string continue } - // Convert to absolute path to ensure consistency with provider upload code - // and prevent issues if working directory changes between validation and upload absPath, err := filepath.Abs(filePath) if err != nil { slog.Warn("skipping attachment: invalid path", "path", filePath, "error", err) @@ -319,22 +304,48 @@ func (a *App) Run(ctx context.Context, cancel context.CancelFunc, message string } mimeType := chat.DetectMimeType(absPath) - if !chat.IsSupportedMimeType(mimeType) { + + switch { + case chat.IsTextFile(absPath): + // Text files are appended to the message text so the model + // sees them in a single content block alongside the user's question. + if fi.Size() > chat.MaxInlineFileSize { + slog.Warn("skipping attachment: text file too large to inline", "path", absPath, "size", fi.Size(), "max", chat.MaxInlineFileSize) + a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: text file too large to inline (max 1MB)", filePath), "") + continue + } + content, err := chat.ReadFileForInline(absPath) + if err != nil { + slog.Warn("skipping attachment: failed to read file", "path", absPath, "error", err) + a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: failed to read file", filePath), "") + continue + } + textBuilder.WriteString("\n\n") + textBuilder.WriteString(content) + + case chat.IsSupportedMimeType(mimeType): + // Binary files (images, PDFs) are kept as separate file parts. + binaryParts = append(binaryParts, chat.MessagePart{ + Type: chat.MessagePartTypeFile, + File: &chat.MessageFile{ + Path: absPath, + MimeType: mimeType, + }, + }) + + default: slog.Warn("skipping attachment: unsupported file type", "path", absPath, "mime_type", mimeType) - a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: unsupported file type (supported: images, pdf, txt, md)", filePath), "") + a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: unsupported file type", filePath), "") continue } + } - multiContent = append(multiContent, chat.MessagePart{ - Type: chat.MessagePartTypeFile, - File: &chat.MessageFile{ - Path: absPath, - MimeType: mimeType, - }, - }) + multiContent := []chat.MessagePart{ + {Type: chat.MessagePartTypeText, Text: textBuilder.String()}, } + multiContent = append(multiContent, binaryParts...) - a.session.AddMessage(session.UserMessage(cleanMessage, multiContent...)) + a.session.AddMessage(session.UserMessage(message, multiContent...)) } else { a.session.AddMessage(session.UserMessage(message)) } diff --git a/pkg/chat/chat.go b/pkg/chat/chat.go index 882295d55..9762243d0 100644 --- a/pkg/chat/chat.go +++ b/pkg/chat/chat.go @@ -1,12 +1,21 @@ package chat import ( + "fmt" + "os" "path/filepath" "strings" + "unicode/utf8" "github.com/docker/cagent/pkg/tools" ) +// MaxInlineFileSize is the maximum size of a text file that can be inlined +// directly into a message. Files larger than this are skipped with a warning. +// This is much smaller than the general upload limit because inline content +// expands token usage significantly. +const MaxInlineFileSize = 5 * 1024 * 1024 // 5MB + type MessageRole string const ( @@ -162,8 +171,9 @@ type MessageStream interface { // DetectMimeType returns the MIME type for a file based on its extension. // This is the canonical implementation used across all packages for consistency. -// Note: Only returns MIME types that are supported for file attachments. -// Unsupported extensions return "application/octet-stream". +// For binary file types (images, PDF), returns the specific MIME type. +// For text-based files, returns "text/plain". +// Unrecognized extensions return "application/octet-stream". func DetectMimeType(filePath string) string { ext := strings.ToLower(filepath.Ext(filePath)) switch ext { @@ -176,14 +186,13 @@ func DetectMimeType(filePath string) string { return "image/gif" case ".webp": return "image/webp" - // Documents + // PDF case ".pdf": return "application/pdf" - case ".txt", ".json", ".csv": - return "text/plain" - case ".md", ".markdown": - return "text/markdown" default: + if isTextExtension(ext) { + return "text/plain" + } return "application/octet-stream" } } @@ -194,9 +203,145 @@ func IsSupportedMimeType(mimeType string) bool { switch mimeType { case "image/jpeg", "image/png", "image/gif", "image/webp": return true - case "application/pdf", "text/plain", "text/markdown": + case "application/pdf", "text/plain": return true default: return false } } + +// IsTextFile determines if a file at the given path is a text file that should +// be inlined into the message rather than uploaded via a provider's file API. +// It first checks the file extension against a broad allowlist of known text +// extensions. For unknown extensions, it falls back to reading the first 8KB +// and checking if the content is valid UTF-8 with no null bytes. +func IsTextFile(filePath string) bool { + ext := strings.ToLower(filepath.Ext(filePath)) + if isTextExtension(ext) { + return true + } + // For unknown extensions, try byte-sniffing + return isTextByContent(filePath) +} + +// ReadFileForInline reads a text file and wraps it in an XML-like tag with +// the file path for context. Returns the formatted content and any error. +func ReadFileForInline(filePath string) (string, error) { + data, err := os.ReadFile(filePath) + if err != nil { + return "", fmt.Errorf("failed to read file: %w", err) + } + return fmt.Sprintf("\n%s\n", filePath, string(data)), nil +} + +// isTextExtension returns true for file extensions known to be text-based. +// This includes programming languages, config files, markup, and data formats. +func isTextExtension(ext string) bool { + switch ext { + // Plain text and documentation + case ".txt", ".text", ".log", + ".md", ".markdown", ".mdown", ".mkd", ".mdx", + ".rst", ".adoc", ".asciidoc", + ".org", ".wiki", ".tex", ".latex", + // Web + ".html", ".htm", ".xhtml", ".css", ".scss", ".sass", ".less", + ".js", ".jsx", ".mjs", ".cjs", + ".ts", ".tsx", ".mts", ".cts", + ".vue", ".svelte", ".astro", + // Programming languages + ".go", ".py", ".pyi", ".pyw", + ".rb", ".rbw", + ".rs", + ".java", ".kt", ".kts", ".groovy", ".scala", ".clj", ".cljs", + ".c", ".h", ".cpp", ".cc", ".cxx", ".hpp", ".hxx", ".hh", + ".cs", ".fs", ".fsx", + ".swift", ".m", ".mm", + ".r", ".R", + ".lua", + ".pl", ".pm", ".perl", + ".php", + ".ex", ".exs", + ".erl", ".hrl", + ".hs", ".lhs", + ".ml", ".mli", + ".nim", + ".zig", + ".v", ".sv", + ".dart", + ".jl", + ".d", + ".pas", ".pp", + ".lisp", ".cl", ".el", + ".tcl", + ".awk", + ".sql", + // Shell and scripting + ".sh", ".bash", ".zsh", ".fish", ".ksh", ".csh", + ".ps1", ".psm1", ".psd1", + ".bat", ".cmd", + // Data and config formats + ".json", ".jsonl", ".ndjson", ".json5", ".jsonc", + ".yaml", ".yml", + ".toml", + ".xml", ".xsl", ".xslt", ".xsd", ".dtd", + ".csv", ".tsv", + ".ini", ".cfg", ".conf", ".config", + ".env", ".envrc", + ".properties", + ".plist", + ".hcl", ".tf", ".tfvars", + // Build and project files + ".makefile", ".mk", + ".cmake", + ".gradle", + ".sbt", + ".cabal", + ".gemspec", + ".podspec", + // Docker and container + ".dockerfile", + ".containerfile", + // Git + ".gitignore", ".gitattributes", ".gitmodules", + // Editor and IDE config + ".editorconfig", + ".eslintrc", ".prettierrc", ".stylelintrc", + // Other + ".proto", ".thrift", ".avsc", + ".graphql", ".gql", + ".svg", + ".diff", ".patch": + return true + default: + return false + } +} + +// isTextByContent reads the first 8KB of a file and checks if it looks like text. +// A file is considered text if it's valid UTF-8 and contains no null bytes. +func isTextByContent(filePath string) bool { + f, err := os.Open(filePath) + if err != nil { + return false + } + defer f.Close() + + buf := make([]byte, 8*1024) + n, _ := f.Read(buf) + if n == 0 { + // Empty files are treated as text + return true + } + + data := buf[:n] + + // Check for null bytes (strong binary indicator) + for _, b := range data { + if b == 0 { + return false + } + } + + // Check if the content is valid UTF-8 + return utf8.Valid(data) +} diff --git a/pkg/chat/chat_test.go b/pkg/chat/chat_test.go new file mode 100644 index 000000000..d32d87412 --- /dev/null +++ b/pkg/chat/chat_test.go @@ -0,0 +1,171 @@ +package chat + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDetectMimeType(t *testing.T) { + t.Parallel() + tests := []struct { + path string + expected string + }{ + // Images + {"photo.jpg", "image/jpeg"}, + {"photo.jpeg", "image/jpeg"}, + {"photo.png", "image/png"}, + {"photo.gif", "image/gif"}, + {"photo.webp", "image/webp"}, + // PDF + {"document.pdf", "application/pdf"}, + // Text files - all map to text/plain + {"readme.txt", "text/plain"}, + {"readme.md", "text/plain"}, + {"readme.markdown", "text/plain"}, + {"data.json", "text/plain"}, + {"data.csv", "text/plain"}, + {"main.go", "text/plain"}, + {"script.py", "text/plain"}, + {"config.yaml", "text/plain"}, + {"Makefile.mk", "text/plain"}, + {"page.html", "text/plain"}, + {"style.css", "text/plain"}, + {"app.ts", "text/plain"}, + {"app.tsx", "text/plain"}, + {"lib.rs", "text/plain"}, + {"Main.java", "text/plain"}, + {"script.sh", "text/plain"}, + {"config.toml", "text/plain"}, + {"schema.sql", "text/plain"}, + {"Dockerfile.dockerfile", "text/plain"}, + {"query.graphql", "text/plain"}, + {"icon.svg", "text/plain"}, + {"changes.diff", "text/plain"}, + // Unknown binary + {"archive.tar.gz", "application/octet-stream"}, + {"program.exe", "application/octet-stream"}, + {"movie.mp4", "application/octet-stream"}, + } + + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + t.Parallel() + result := DetectMimeType(tt.path) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestIsSupportedMimeType(t *testing.T) { + t.Parallel() + tests := []struct { + mimeType string + expected bool + }{ + {"image/jpeg", true}, + {"image/png", true}, + {"image/gif", true}, + {"image/webp", true}, + {"application/pdf", true}, + {"text/plain", true}, + {"text/markdown", false}, + {"application/octet-stream", false}, + {"video/mp4", false}, + } + + for _, tt := range tests { + t.Run(tt.mimeType, func(t *testing.T) { + t.Parallel() + result := IsSupportedMimeType(tt.mimeType) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestIsTextFile_KnownExtensions(t *testing.T) { + t.Parallel() + textFiles := []string{ + "main.go", "script.py", "app.ts", "lib.rs", "Main.java", + "readme.md", "doc.txt", "data.json", "config.yaml", + "style.css", "page.html", "query.sql", "script.sh", + "config.toml", "schema.xml", "data.csv", "notes.org", + "code.cpp", "header.h", "module.ex", "func.hs", + "app.swift", "code.kt", "app.dart", "code.zig", + ".gitignore", "Makefile.mk", "query.graphql", + } + + for _, f := range textFiles { + t.Run(f, func(t *testing.T) { + t.Parallel() + assert.True(t, IsTextFile(f), "expected %s to be detected as text", f) + }) + } +} + +func TestIsTextFile_KnownBinaryExtensions(t *testing.T) { + t.Parallel() + // Binary extensions are not in the text allowlist and won't match byte-sniffing + // if the file doesn't exist (IsTextFile returns false for unreadable files) + binaryFiles := []string{ + "archive.tar.gz", "program.exe", "movie.mp4", "image.png", + } + + for _, f := range binaryFiles { + t.Run(f, func(t *testing.T) { + t.Parallel() + // These files don't exist, so byte-sniffing can't run either + assert.False(t, IsTextFile(f), "expected %s to not be detected as text", f) + }) + } +} + +func TestIsTextFile_ByteSniffing(t *testing.T) { + t.Parallel() + tmpDir := t.TempDir() + + // File with unknown extension but text content + textFile := filepath.Join(tmpDir, "data.custom") + err := os.WriteFile(textFile, []byte("This is plain text content\nwith multiple lines\n"), 0o644) + require.NoError(t, err) + assert.True(t, IsTextFile(textFile), "text content with unknown extension should be detected as text") + + // File with unknown extension and binary content (null bytes) + binFile := filepath.Join(tmpDir, "data.custom2") + err = os.WriteFile(binFile, []byte{0x00, 0x01, 0x02, 0x03, 0xFF}, 0o644) + require.NoError(t, err) + assert.False(t, IsTextFile(binFile), "binary content should not be detected as text") + + // Empty file should be treated as text + emptyFile := filepath.Join(tmpDir, "empty.custom") + err = os.WriteFile(emptyFile, []byte{}, 0o644) + require.NoError(t, err) + assert.True(t, IsTextFile(emptyFile), "empty file should be treated as text") +} + +func TestReadFileForInline(t *testing.T) { + t.Parallel() + tmpDir := t.TempDir() + + testFile := filepath.Join(tmpDir, "example.go") + content := "package main\n\nfunc main() {}\n" + err := os.WriteFile(testFile, []byte(content), 0o644) + require.NoError(t, err) + + result, err := ReadFileForInline(testFile) + require.NoError(t, err) + + assert.Contains(t, result, ``) + assert.Contains(t, result, content) + assert.Contains(t, result, ``) +} + +func TestReadFileForInline_NotFound(t *testing.T) { + t.Parallel() + _, err := ReadFileForInline("/nonexistent/file.txt") + assert.Error(t, err) +} diff --git a/pkg/cli/runner.go b/pkg/cli/runner.go index 5a15782ac..acf5ec9e8 100644 --- a/pkg/cli/runner.go +++ b/pkg/cli/runner.go @@ -316,8 +316,8 @@ func ParseAttachCommand(userInput string) (messageText, attachPath string) { } // CreateUserMessageWithAttachment creates a user message with optional file attachment. -// The attachment is stored as a file reference (path + MIME type) rather than base64-encoded -// content. The actual upload to the provider's file storage happens at request time. +// Text files are inlined directly as text content for cross-provider compatibility. +// Binary files (images, PDFs) are stored as file references for provider-specific upload. func CreateUserMessageWithAttachment(userContent, attachmentPath string) *session.Message { if attachmentPath == "" { return session.UserMessage(userContent) @@ -330,31 +330,54 @@ func CreateUserMessageWithAttachment(userContent, attachmentPath string) *sessio return session.UserMessage(userContent) } - if _, err := os.Stat(absPath); os.IsNotExist(err) { - slog.Warn("Attachment file does not exist", "path", absPath) + fi, err := os.Stat(absPath) + if err != nil { + slog.Warn("Attachment file not accessible", "path", absPath, "error", err) return session.UserMessage(userContent) } - // Determine MIME type - mimeType := chat.DetectMimeType(absPath) - // Ensure we have some text content when attaching a file textContent := cmp.Or(strings.TrimSpace(userContent), "Please analyze this attached file.") - // Create message with multi-content including text and file reference multiContent := []chat.MessagePart{ { Type: chat.MessagePartTypeText, Text: textContent, }, - { + } + + switch { + case chat.IsTextFile(absPath): + // Text files are inlined directly as text content. + if fi.Size() > chat.MaxInlineFileSize { + slog.Warn("Attachment text file too large to inline", "path", absPath, "size", fi.Size()) + return session.UserMessage(userContent) + } + content, err := chat.ReadFileForInline(absPath) + if err != nil { + slog.Warn("Failed to read attachment file", "path", absPath, "error", err) + return session.UserMessage(userContent) + } + multiContent = append(multiContent, chat.MessagePart{ + Type: chat.MessagePartTypeText, + Text: content, + }) + + default: + // Binary files (images, PDFs) are kept as file references. + mimeType := chat.DetectMimeType(absPath) + if !chat.IsSupportedMimeType(mimeType) { + slog.Warn("Unsupported attachment file type", "path", absPath, "mime_type", mimeType) + return session.UserMessage(userContent) + } + multiContent = append(multiContent, chat.MessagePart{ Type: chat.MessagePartTypeFile, File: &chat.MessageFile{ Path: absPath, MimeType: mimeType, }, - }, + }) } - return session.UserMessage("", multiContent...) + return session.UserMessage(textContent, multiContent...) } diff --git a/pkg/model/provider/anthropic/beta_converter.go b/pkg/model/provider/anthropic/beta_converter.go index 1844dbf5c..e8601017a 100644 --- a/pkg/model/provider/anthropic/beta_converter.go +++ b/pkg/model/provider/anthropic/beta_converter.go @@ -263,7 +263,7 @@ func createBetaFileContentBlock(fileID, mimeType string) (anthropic.BetaContentB }, nil } - if IsDocumentMime(mimeType) { + if IsAnthropicDocumentMime(mimeType) { return anthropic.BetaContentBlockParamUnion{ OfDocument: &anthropic.BetaRequestDocumentBlockParam{ Source: anthropic.BetaRequestDocumentBlockSourceUnionParam{ diff --git a/pkg/model/provider/anthropic/files.go b/pkg/model/provider/anthropic/files.go index 609585a46..46bc14f65 100644 --- a/pkg/model/provider/anthropic/files.go +++ b/pkg/model/provider/anthropic/files.go @@ -398,10 +398,10 @@ func IsImageMime(mimeType string) bool { } } -// IsDocumentMime returns true if the MIME type is a document type supported by Anthropic. -func IsDocumentMime(mimeType string) bool { +// IsAnthropicDocumentMime returns true if the MIME type is a document type supported by Anthropic. +func IsAnthropicDocumentMime(mimeType string) bool { switch mimeType { - case "application/pdf", "text/plain", "text/markdown": + case "application/pdf", "text/plain": return true default: return false diff --git a/pkg/model/provider/anthropic/files_test.go b/pkg/model/provider/anthropic/files_test.go index 8c0ba84b5..d835c0235 100644 --- a/pkg/model/provider/anthropic/files_test.go +++ b/pkg/model/provider/anthropic/files_test.go @@ -24,8 +24,8 @@ func TestDetectMimeType(t *testing.T) { {"image.webp", "image/webp"}, {"document.pdf", "application/pdf"}, {"readme.txt", "text/plain"}, - {"readme.md", "text/markdown"}, - {"readme.markdown", "text/markdown"}, + {"readme.md", "text/plain"}, + {"readme.markdown", "text/plain"}, // json and csv are treated as text/plain for provider compatibility {"data.json", "text/plain"}, {"data.csv", "text/plain"}, @@ -69,7 +69,7 @@ func TestIsDocumentMime(t *testing.T) { }{ {"application/pdf", true}, {"text/plain", true}, - {"text/markdown", true}, + {"text/markdown", false}, {"image/jpeg", false}, {"image/png", false}, {"application/octet-stream", false}, @@ -77,7 +77,7 @@ func TestIsDocumentMime(t *testing.T) { for _, tt := range tests { t.Run(tt.mimeType, func(t *testing.T) { - result := IsDocumentMime(tt.mimeType) + result := IsAnthropicDocumentMime(tt.mimeType) assert.Equal(t, tt.expected, result) }) } @@ -94,7 +94,7 @@ func TestIsSupportedMime(t *testing.T) { {"image/webp", true}, {"application/pdf", true}, {"text/plain", true}, - {"text/markdown", true}, + {"text/markdown", false}, {"application/json", false}, {"application/octet-stream", false}, } diff --git a/pkg/tui/components/editor/editor.go b/pkg/tui/components/editor/editor.go index 75f0cf0e7..1679a2bcb 100644 --- a/pkg/tui/components/editor/editor.go +++ b/pkg/tui/components/editor/editor.go @@ -620,6 +620,17 @@ func (e *editor) Update(msg tea.Msg) (layout.Model, tea.Cmd) { case completion.SelectedMsg: // If the item has an Execute function, run it instead of inserting text if msg.Execute != nil { + // Remove the trigger character and any typed completion word from the textarea + // before executing. For example, typing "@" then selecting "Browse files..." + // should remove the "@" so AttachFile doesn't produce a double "@@". + if e.currentCompletion != nil { + triggerWord := e.currentCompletion.Trigger() + e.completionWord + currentValue := e.textarea.Value() + if idx := strings.LastIndex(currentValue, triggerWord); idx >= 0 { + e.textarea.SetValue(currentValue[:idx] + currentValue[idx+len(triggerWord):]) + e.textarea.MoveToEnd() + } + } e.clearSuggestion() return e, msg.Execute() } @@ -639,7 +650,7 @@ func (e *editor) Update(msg tea.Msg) (layout.Model, tea.Cmd) { // For non-auto-submit completions (like file paths), replace the completion word currentValue := e.textarea.Value() if lastIdx := strings.LastIndex(currentValue, e.completionWord); lastIdx >= 0 { - newValue := currentValue[:lastIdx-1] + msg.Value + currentValue[lastIdx+len(e.completionWord):] + newValue := currentValue[:lastIdx-1] + msg.Value + " " + currentValue[lastIdx+len(e.completionWord):] e.textarea.SetValue(newValue) e.textarea.MoveToEnd() }