Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ private Message create(MessageCreateParams inputMessage, RequestOptions requestO
}

Context context = instrumenter.start(parentContext, inputMessage);
long startTimeNanos = System.nanoTime();
Message outputMessage;
try (Scope ignored = context.makeCurrent()) {
Span currentSpan = Span.current();
Expand All @@ -96,6 +97,10 @@ private Message create(MessageCreateParams inputMessage, RequestOptions requestO
}
BraintrustAnthropicSpanAttributes.setInputMessages(currentSpan, inputMessages);
outputMessage = delegate.create(inputMessage, requestOptions);
long endTimeNanos = System.nanoTime();
double timeToFirstTokenSeconds = (endTimeNanos - startTimeNanos) / 1_000_000_000.0;
currentSpan.setAttribute(
"braintrust.metrics.time_to_first_token", timeToFirstTokenSeconds);
BraintrustAnthropicSpanAttributes.setOutputMessage(Span.current(), outputMessage);
} catch (Throwable t) {
instrumenter.end(context, inputMessage, null, t);
Expand Down Expand Up @@ -143,12 +148,18 @@ private StreamResponse<RawMessageStreamEvent> createStreamingWithAttributes(
}
BraintrustAnthropicSpanAttributes.setInputMessages(span, inputMessages);

long startTimeNanos = System.nanoTime();
StreamResponse<RawMessageStreamEvent> result =
delegate.createStreaming(inputMessage, requestOptions);
return new TracingStreamedResponse(
result,
new StreamListener(
context, inputMessage, instrumenter, captureMessageContent, newSpan));
context,
inputMessage,
instrumenter,
captureMessageContent,
newSpan,
startTimeNanos));
}

private static String contentToString(MessageCreateParams.System content) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ final class StreamListener {
private final boolean captureMessageContent;
private final boolean newSpan;
private final AtomicBoolean hasEnded;
private final long startTimeNanos;

private final StringBuilder contentBuilder = new StringBuilder();

Expand All @@ -35,23 +36,32 @@ final class StreamListener {
@Nullable private Model model;
@Nullable private String responseId;
@Nullable private String stopReason;
@Nullable private Double timeToFirstToken;

StreamListener(
Context context,
MessageCreateParams request,
Instrumenter<MessageCreateParams, Message> instrumenter,
boolean captureMessageContent,
boolean newSpan) {
boolean newSpan,
long startTimeNanos) {
this.context = context;
this.request = request;
this.instrumenter = instrumenter;
this.captureMessageContent = captureMessageContent;
this.newSpan = newSpan;
this.startTimeNanos = startTimeNanos;
hasEnded = new AtomicBoolean();
}

@SneakyThrows
void onEvent(RawMessageStreamEvent event) {
// Capture time to first token on the first event
if (timeToFirstToken == null) {
long firstEventTimeNanos = System.nanoTime();
timeToFirstToken = (firstEventTimeNanos - startTimeNanos) / 1_000_000_000.0;
}

// Handle message_start event
if (event.messageStart().isPresent()) {
var messageStart = event.messageStart().get();
Expand Down Expand Up @@ -144,6 +154,11 @@ void endSpan(@Nullable Throwable error) {
span.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens());
}

// Set time to first token if captured
if (timeToFirstToken != null) {
span.setAttribute("braintrust.metrics.time_to_first_token", timeToFirstToken);
}

instrumenter.end(context, request, null, error);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ void testWrapAnthropic() {
outputMessage.get("content").get(0).get("text").asText());
assertEquals(8, outputMessage.get("usage").get("output_tokens").asInt());
assertEquals(20, outputMessage.get("usage").get("input_tokens").asInt());

// Verify time to first token
Double timeToFirstToken =
span.getAttributes()
.get(AttributeKey.doubleKey("braintrust.metrics.time_to_first_token"));
assertNotNull(timeToFirstToken, "time_to_first_token should be present");
assertTrue(timeToFirstToken >= 0.0, "time_to_first_token should be non-negative");
}

@Test
Expand Down Expand Up @@ -280,5 +287,12 @@ void testWrapAnthropicStreaming() {
var messageZero = outputMessages.get(0);
assertEquals("assistant", messageZero.get("role").asText());
assertEquals("The capital of France is Paris.", messageZero.get("content").asText());

// Verify time to first token
Double timeToFirstToken =
span.getAttributes()
.get(AttributeKey.doubleKey("braintrust.metrics.time_to_first_token"));
assertNotNull(timeToFirstToken, "time_to_first_token should be present for streaming");
assertTrue(timeToFirstToken >= 0.0, "time_to_first_token should be non-negative");
}
}