Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions pkg/runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -1093,9 +1093,9 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
Cost: messageCost,
Model: messageModel,
}
}
if res.RateLimit != nil {
msgUsage.RateLimit = *res.RateLimit
if res.RateLimit != nil {
msgUsage.RateLimit = *res.RateLimit
}
}

addAgentMessage(sess, a, &assistantMessage, events)
Expand Down Expand Up @@ -1270,6 +1270,7 @@ func (r *LocalRuntime) handleStream(ctx context.Context, stream chat.MessageStre
var actualModelEventEmitted bool
var messageUsage *chat.Usage
var messageRateLimit *chat.RateLimit
var prevStreamCost float64 // cost contributed by previous usage emission in this stream

modelID := getAgentModelID(a)
toolCallIndex := make(map[string]int) // toolCallID -> index in toolCalls slice
Expand All @@ -1292,11 +1293,12 @@ func (r *LocalRuntime) handleStream(ctx context.Context, stream chat.MessageStre
messageUsage = response.Usage

if m != nil && m.Cost != nil {
cost := float64(response.Usage.InputTokens)*m.Cost.Input +
streamCost := (float64(response.Usage.InputTokens)*m.Cost.Input +
float64(response.Usage.OutputTokens)*m.Cost.Output +
float64(response.Usage.CachedInputTokens)*m.Cost.CacheRead +
float64(response.Usage.CacheWriteTokens)*m.Cost.CacheWrite
sess.Cost += cost / 1e6
float64(response.Usage.CacheWriteTokens)*m.Cost.CacheWrite) / 1e6
sess.Cost += streamCost - prevStreamCost
prevStreamCost = streamCost
}

sess.InputTokens = response.Usage.InputTokens + response.Usage.CachedInputTokens + response.Usage.CacheWriteTokens
Expand Down