Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 78 additions & 5 deletions pkg/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import (
"context"
"fmt"
"log/slog"
"os"
"os/exec"
"path/filepath"
"slices"
"strings"
"sync/atomic"
Expand Down Expand Up @@ -243,20 +245,91 @@ func (a *App) Run(ctx context.Context, cancel context.CancelFunc, message string

go func() {
if len(attachments) > 0 {
// Strip attachment placeholders from the message text
// Placeholders are in the format @/path/to/file
cleanMessage := message
for placeholder := range attachments {
cleanMessage = strings.ReplaceAll(cleanMessage, placeholder, "")
}
cleanMessage = strings.TrimSpace(cleanMessage)
if cleanMessage == "" {
cleanMessage = "Please analyze this attached file."
}

multiContent := []chat.MessagePart{
{
Type: chat.MessagePartTypeText,
Text: message,
Text: cleanMessage,
},
}

for key, dataURL := range attachments {
// Attachments are keyed by @filepath placeholder
// Extract the file path and add as file attachment for provider upload.
// Note: There is an inherent TOCTOU race between this validation and when
// the provider reads the file during upload. This validation catches common
// cases (deleted files, wrong paths) but files could still change before upload.
for placeholder := range attachments {
filePath := strings.TrimPrefix(placeholder, "@")
if filePath == "" {
slog.Debug("skipping attachment with empty file path", "placeholder", placeholder)
continue
}

// Convert to absolute path to ensure consistency with provider upload code
// and prevent issues if working directory changes between validation and upload
absPath, err := filepath.Abs(filePath)
if err != nil {
slog.Warn("skipping attachment: invalid path", "path", filePath, "error", err)
a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: invalid path", filePath), "")
continue
}

fi, err := os.Stat(absPath)
if err != nil {
var reason string
switch {
case os.IsNotExist(err):
reason = "file does not exist"
case os.IsPermission(err):
reason = "permission denied"
default:
reason = fmt.Sprintf("cannot access file: %v", err)
}
slog.Warn("skipping attachment", "path", absPath, "reason", reason)
a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: %s", filePath, reason), "")
continue
}

if !fi.Mode().IsRegular() {
slog.Warn("skipping attachment: not a regular file", "path", absPath, "mode", fi.Mode().String())
a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: not a regular file", filePath), "")
continue
}

const maxAttachmentSize = 100 * 1024 * 1024 // 100MB
if fi.Size() > maxAttachmentSize {
slog.Warn("skipping attachment: file too large", "path", absPath, "size", fi.Size(), "max", maxAttachmentSize)
a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: file too large (max 100MB)", filePath), "")
continue
}

mimeType := chat.DetectMimeType(absPath)
if !chat.IsSupportedMimeType(mimeType) {
slog.Warn("skipping attachment: unsupported file type", "path", absPath, "mime_type", mimeType)
a.events <- runtime.Warning(fmt.Sprintf("Skipped attachment %s: unsupported file type (supported: images, pdf, txt, md)", filePath), "")
continue
}

multiContent = append(multiContent, chat.MessagePart{
Type: chat.MessagePartTypeText,
Text: fmt.Sprintf("Contents of %s: %s", key, dataURL),
Type: chat.MessagePartTypeFile,
File: &chat.MessageFile{
Path: absPath,
MimeType: mimeType,
},
})
}
a.session.AddMessage(session.UserMessage(message, multiContent...))

a.session.AddMessage(session.UserMessage(cleanMessage, multiContent...))
} else {
a.session.AddMessage(session.UserMessage(message))
}
Expand Down
57 changes: 56 additions & 1 deletion pkg/chat/chat.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
package chat

import "github.com/docker/cagent/pkg/tools"
import (
"path/filepath"
"strings"

"github.com/docker/cagent/pkg/tools"
)

type MessageRole string

Expand All @@ -16,6 +21,7 @@ type MessagePartType string
const (
MessagePartTypeText MessagePartType = "text"
MessagePartTypeImageURL MessagePartType = "image_url"
MessagePartTypeFile MessagePartType = "file"
)

type ImageURLDetail string
Expand Down Expand Up @@ -74,10 +80,18 @@ type Message struct {
CacheControl bool `json:"cache_control,omitempty"`
}

// MessageFile represents a file attachment that can be uploaded to a provider's file storage.
type MessageFile struct {
Path string `json:"path,omitempty"` // Local file path (used for upload)
FileID string `json:"file_id,omitempty"` // Provider-specific file ID (after upload)
MimeType string `json:"mime_type,omitempty"` // MIME type of the file
}

type MessagePart struct {
Type MessagePartType `json:"type,omitempty"`
Text string `json:"text,omitempty"`
ImageURL *MessageImageURL `json:"image_url,omitempty"`
File *MessageFile `json:"file,omitempty"`
}

// FinishReason represents the reason why the model finished generating a response
Expand Down Expand Up @@ -145,3 +159,44 @@ type MessageStream interface {
// Close closes the stream
Close()
}

// DetectMimeType returns the MIME type for a file based on its extension.
// This is the canonical implementation used across all packages for consistency.
// Note: Only returns MIME types that are supported for file attachments.
// Unsupported extensions return "application/octet-stream".
func DetectMimeType(filePath string) string {
ext := strings.ToLower(filepath.Ext(filePath))
switch ext {
// Images
case ".jpg", ".jpeg":
return "image/jpeg"
case ".png":
return "image/png"
case ".gif":
return "image/gif"
case ".webp":
return "image/webp"
// Documents
case ".pdf":
return "application/pdf"
case ".txt", ".json", ".csv":
return "text/plain"
case ".md", ".markdown":
return "text/markdown"
default:
return "application/octet-stream"
}
}

// IsSupportedMimeType returns true if the MIME type is supported for file attachments.
// Supported types include images (jpeg, png, gif, webp) and documents (pdf, text, markdown).
func IsSupportedMimeType(mimeType string) bool {
switch mimeType {
case "image/jpeg", "image/png", "image/gif", "image/webp":
return true
case "application/pdf", "text/plain", "text/markdown":
return true
default:
return false
}
}
71 changes: 19 additions & 52 deletions pkg/cli/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package cli
import (
"cmp"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
Expand Down Expand Up @@ -316,78 +315,46 @@ func ParseAttachCommand(userInput string) (messageText, attachPath string) {
return messageText, attachPath
}

// CreateUserMessageWithAttachment creates a user message with optional image attachment
// CreateUserMessageWithAttachment creates a user message with optional file attachment.
// The attachment is stored as a file reference (path + MIME type) rather than base64-encoded
// content. The actual upload to the provider's file storage happens at request time.
func CreateUserMessageWithAttachment(userContent, attachmentPath string) *session.Message {
if attachmentPath == "" {
return session.UserMessage(userContent)
}

// Convert file to data URL
dataURL, err := fileToDataURL(attachmentPath)
// Validate file exists
absPath, err := filepath.Abs(attachmentPath)
if err != nil {
slog.Warn("Failed to attach file", "path", attachmentPath, "error", err)
slog.Warn("Failed to get absolute path for attachment", "path", attachmentPath, "error", err)
return session.UserMessage(userContent)
}

if _, err := os.Stat(absPath); os.IsNotExist(err) {
slog.Warn("Attachment file does not exist", "path", absPath)
return session.UserMessage(userContent)
}

// Determine MIME type
mimeType := chat.DetectMimeType(absPath)

// Ensure we have some text content when attaching a file
textContent := cmp.Or(strings.TrimSpace(userContent), "Please analyze this attached file.")

// Create message with multi-content including text and image
// Create message with multi-content including text and file reference
multiContent := []chat.MessagePart{
{
Type: chat.MessagePartTypeText,
Text: textContent,
},
{
Type: chat.MessagePartTypeImageURL,
ImageURL: &chat.MessageImageURL{
URL: dataURL,
Detail: chat.ImageURLDetailAuto,
Type: chat.MessagePartTypeFile,
File: &chat.MessageFile{
Path: absPath,
MimeType: mimeType,
},
},
}

return session.UserMessage("", multiContent...)
}

// fileToDataURL converts a file to a data URL
func fileToDataURL(filePath string) (string, error) {
// Check if file exists
if _, err := os.Stat(filePath); os.IsNotExist(err) {
return "", fmt.Errorf("file does not exist: %s", filePath)
}

// Read file content
fileBytes, err := os.ReadFile(filePath)
if err != nil {
return "", fmt.Errorf("failed to read file: %w", err)
}

// Determine MIME type based on file extension
ext := strings.ToLower(filepath.Ext(filePath))
var mimeType string
switch ext {
case ".jpg", ".jpeg":
mimeType = "image/jpeg"
case ".png":
mimeType = "image/png"
case ".gif":
mimeType = "image/gif"
case ".webp":
mimeType = "image/webp"
case ".bmp":
mimeType = "image/bmp"
case ".svg":
mimeType = "image/svg+xml"
default:
return "", fmt.Errorf("unsupported image format: %s", ext)
}

// Encode to base64
encoded := base64.StdEncoding.EncodeToString(fileBytes)

// Create data URL
dataURL := fmt.Sprintf("data:%s;base64,%s", mimeType, encoded)

return dataURL, nil
}
20 changes: 18 additions & 2 deletions pkg/model/provider/anthropic/beta_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@ func (c *Client) createBetaStream(
return nil, err
}

converted := convertBetaMessages(messages)
converted, err := c.convertBetaMessages(ctx, messages)
if err != nil {
slog.Error("Failed to convert messages for Anthropic Beta request", "error", err)
return nil, err
}
if err := validateAnthropicSequencingBeta(converted); err != nil {
slog.Warn("Invalid message sequencing for Anthropic Beta API detected, attempting self-repair", "error", err)
converted = repairAnthropicSequencingBeta(converted)
Expand All @@ -50,13 +54,25 @@ func (c *Client) createBetaStream(

sys := extractBetaSystemBlocks(messages)

// Check if messages contain file attachments to include the files-api beta header
needsFilesAPI := hasFileAttachments(messages)

betas := []anthropic.AnthropicBeta{
anthropic.AnthropicBetaInterleavedThinking2025_05_14,
"fine-grained-tool-streaming-2025-05-14",
}
if needsFilesAPI {
betas = append(betas, filesAPIBeta)
slog.Debug("Anthropic Beta API: Including files-api beta header for file attachments")
}

params := anthropic.BetaMessageNewParams{
Model: anthropic.Model(c.ModelConfig.Model),
MaxTokens: maxTokens,
System: sys,
Messages: converted,
Tools: allTools,
Betas: []anthropic.AnthropicBeta{anthropic.AnthropicBetaInterleavedThinking2025_05_14, "fine-grained-tool-streaming-2025-05-14"},
Betas: betas,
}

// Apply structured output configuration
Expand Down
9 changes: 6 additions & 3 deletions pkg/model/provider/anthropic/beta_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@ func TestConvertBetaMessages_UserMessage(t *testing.T) {
},
}

converted := convertBetaMessages(msgs)
converted, err := testClient().convertBetaMessages(t.Context(), msgs)
require.NoError(t, err)

require.Len(t, converted, 1)
assert.Equal(t, anthropic.BetaMessageParamRoleUser, converted[0].Role)
Expand All @@ -282,7 +283,8 @@ func TestConvertBetaMessages_SkipsSystemMessages(t *testing.T) {
},
}

converted := convertBetaMessages(msgs)
converted, err := testClient().convertBetaMessages(t.Context(), msgs)
require.NoError(t, err)

require.Len(t, converted, 1)
assert.Equal(t, anthropic.BetaMessageParamRoleUser, converted[0].Role)
Expand All @@ -297,7 +299,8 @@ func TestConvertBetaMessages_AssistantMessage(t *testing.T) {
},
}

converted := convertBetaMessages(msgs)
converted, err := testClient().convertBetaMessages(t.Context(), msgs)
require.NoError(t, err)

require.Len(t, converted, 1)
assert.Equal(t, anthropic.BetaMessageParamRoleAssistant, converted[0].Role)
Expand Down
Loading
Loading