From 799b15bf8ba387586e2bdf73b15fa0d76649a15e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20Mart=C3=ADnez?= Date: Mon, 8 Dec 2025 00:46:03 +0100 Subject: [PATCH] fix: Ensure event is emitted before any events in Claude SSE responses. --- .../openai/claude/openai_claude_response.go | 43 ++++++++++++++++--- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/pkg/llmproxy/translator/openai/claude/openai_claude_response.go b/pkg/llmproxy/translator/openai/claude/openai_claude_response.go index 4f0c8d983e..e1f78fbc27 100644 --- a/pkg/llmproxy/translator/openai/claude/openai_claude_response.go +++ b/pkg/llmproxy/translator/openai/claude/openai_claude_response.go @@ -8,6 +8,7 @@ package claude import ( "bytes" "context" + "encoding/json" "fmt" "strings" @@ -132,16 +133,40 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI param.CreatedAt = root.Get("created").Int() } - // Emit message_start on the very first chunk, regardless of whether it has a role field. - // Some providers (like Copilot) may send tool_calls in the first chunk without a role field. + // Helper to ensure message_start is sent before any content_block_start + // This is required by the Anthropic SSE protocol - message_start must come first. + // Some OpenAI-compatible providers (like GitHub Copilot) may not send role: "assistant" + // in the first chunk, so we need to emit message_start when we first see content. + ensureMessageStarted := func() { + if param.MessageStarted { + return + } + messageStart := map[string]interface{}{ + "type": "message_start", + "message": map[string]interface{}{ + "id": param.MessageID, + "type": "message", + "role": "assistant", + "model": param.Model, + "content": []interface{}{}, + "stop_reason": nil, + "stop_sequence": nil, + "usage": map[string]interface{}{ + "input_tokens": 0, + "output_tokens": 0, + }, + }, + } + messageStartJSON, _ := json.Marshal(messageStart) + results = append(results, "event: message_start\ndata: "+string(messageStartJSON)+"\n\n") + param.MessageStarted = true + } + + // Check if this is the first chunk (has role) if delta := root.Get("choices.0.delta"); delta.Exists() { if !param.MessageStarted { // Send message_start event - messageStartJSON := `{"type":"message_start","message":{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}}` - messageStartJSON, _ = sjson.Set(messageStartJSON, "message.id", param.MessageID) - messageStartJSON, _ = sjson.Set(messageStartJSON, "message.model", param.Model) - results = append(results, "event: message_start\ndata: "+messageStartJSON+"\n\n") - param.MessageStarted = true + ensureMessageStarted() // Don't send content_block_start for text here - wait for actual content } @@ -154,6 +179,7 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI } stopTextContentBlock(param, &results) if !param.ThinkingContentBlockStarted { + ensureMessageStarted() // Must send message_start before content_block_start if param.ThinkingContentBlockIndex == -1 { param.ThinkingContentBlockIndex = param.NextContentBlockIndex param.NextContentBlockIndex++ @@ -175,6 +201,7 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI if content := delta.Get("content"); content.Exists() && content.String() != "" { // Send content_block_start for text if not already sent if !param.TextContentBlockStarted { + ensureMessageStarted() // Must send message_start before content_block_start stopThinkingContentBlock(param, &results) if param.TextContentBlockIndex == -1 { param.TextContentBlockIndex = param.NextContentBlockIndex @@ -222,6 +249,8 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI if name := function.Get("name"); name.Exists() { accumulator.Name = name.String() + ensureMessageStarted() // Must send message_start before content_block_start + stopThinkingContentBlock(param, &results) stopTextContentBlock(param, &results)