From 1732b75bcb0f0f1a5cb9173fa0370adfdbfcc286 Mon Sep 17 00:00:00 2001 From: Andrew Kent Date: Tue, 18 Nov 2025 20:19:31 -0700 Subject: [PATCH] add time to first token metric for anthropic instrumentation --- .../otel/InstrumentedMessageService.java | 13 ++++++++++++- .../anthropic/otel/StreamListener.java | 17 ++++++++++++++++- .../anthropic/BraintrustAnthropicTest.java | 14 ++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/main/java/dev/braintrust/instrumentation/anthropic/otel/InstrumentedMessageService.java b/src/main/java/dev/braintrust/instrumentation/anthropic/otel/InstrumentedMessageService.java index bf0e70e..69b9c57 100644 --- a/src/main/java/dev/braintrust/instrumentation/anthropic/otel/InstrumentedMessageService.java +++ b/src/main/java/dev/braintrust/instrumentation/anthropic/otel/InstrumentedMessageService.java @@ -79,6 +79,7 @@ private Message create(MessageCreateParams inputMessage, RequestOptions requestO } Context context = instrumenter.start(parentContext, inputMessage); + long startTimeNanos = System.nanoTime(); Message outputMessage; try (Scope ignored = context.makeCurrent()) { Span currentSpan = Span.current(); @@ -96,6 +97,10 @@ private Message create(MessageCreateParams inputMessage, RequestOptions requestO } BraintrustAnthropicSpanAttributes.setInputMessages(currentSpan, inputMessages); outputMessage = delegate.create(inputMessage, requestOptions); + long endTimeNanos = System.nanoTime(); + double timeToFirstTokenSeconds = (endTimeNanos - startTimeNanos) / 1_000_000_000.0; + currentSpan.setAttribute( + "braintrust.metrics.time_to_first_token", timeToFirstTokenSeconds); BraintrustAnthropicSpanAttributes.setOutputMessage(Span.current(), outputMessage); } catch (Throwable t) { instrumenter.end(context, inputMessage, null, t); @@ -143,12 +148,18 @@ private StreamResponse createStreamingWithAttributes( } BraintrustAnthropicSpanAttributes.setInputMessages(span, inputMessages); + long startTimeNanos = System.nanoTime(); StreamResponse result = delegate.createStreaming(inputMessage, requestOptions); return new TracingStreamedResponse( result, new StreamListener( - context, inputMessage, instrumenter, captureMessageContent, newSpan)); + context, + inputMessage, + instrumenter, + captureMessageContent, + newSpan, + startTimeNanos)); } private static String contentToString(MessageCreateParams.System content) { diff --git a/src/main/java/dev/braintrust/instrumentation/anthropic/otel/StreamListener.java b/src/main/java/dev/braintrust/instrumentation/anthropic/otel/StreamListener.java index a93ada5..b504f1d 100644 --- a/src/main/java/dev/braintrust/instrumentation/anthropic/otel/StreamListener.java +++ b/src/main/java/dev/braintrust/instrumentation/anthropic/otel/StreamListener.java @@ -27,6 +27,7 @@ final class StreamListener { private final boolean captureMessageContent; private final boolean newSpan; private final AtomicBoolean hasEnded; + private final long startTimeNanos; private final StringBuilder contentBuilder = new StringBuilder(); @@ -35,23 +36,32 @@ final class StreamListener { @Nullable private Model model; @Nullable private String responseId; @Nullable private String stopReason; + @Nullable private Double timeToFirstToken; StreamListener( Context context, MessageCreateParams request, Instrumenter instrumenter, boolean captureMessageContent, - boolean newSpan) { + boolean newSpan, + long startTimeNanos) { this.context = context; this.request = request; this.instrumenter = instrumenter; this.captureMessageContent = captureMessageContent; this.newSpan = newSpan; + this.startTimeNanos = startTimeNanos; hasEnded = new AtomicBoolean(); } @SneakyThrows void onEvent(RawMessageStreamEvent event) { + // Capture time to first token on the first event + if (timeToFirstToken == null) { + long firstEventTimeNanos = System.nanoTime(); + timeToFirstToken = (firstEventTimeNanos - startTimeNanos) / 1_000_000_000.0; + } + // Handle message_start event if (event.messageStart().isPresent()) { var messageStart = event.messageStart().get(); @@ -144,6 +154,11 @@ void endSpan(@Nullable Throwable error) { span.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens()); } + // Set time to first token if captured + if (timeToFirstToken != null) { + span.setAttribute("braintrust.metrics.time_to_first_token", timeToFirstToken); + } + instrumenter.end(context, request, null, error); } } diff --git a/src/test/java/dev/braintrust/instrumentation/anthropic/BraintrustAnthropicTest.java b/src/test/java/dev/braintrust/instrumentation/anthropic/BraintrustAnthropicTest.java index c28a71a..b4d2764 100644 --- a/src/test/java/dev/braintrust/instrumentation/anthropic/BraintrustAnthropicTest.java +++ b/src/test/java/dev/braintrust/instrumentation/anthropic/BraintrustAnthropicTest.java @@ -152,6 +152,13 @@ void testWrapAnthropic() { outputMessage.get("content").get(0).get("text").asText()); assertEquals(8, outputMessage.get("usage").get("output_tokens").asInt()); assertEquals(20, outputMessage.get("usage").get("input_tokens").asInt()); + + // Verify time to first token + Double timeToFirstToken = + span.getAttributes() + .get(AttributeKey.doubleKey("braintrust.metrics.time_to_first_token")); + assertNotNull(timeToFirstToken, "time_to_first_token should be present"); + assertTrue(timeToFirstToken >= 0.0, "time_to_first_token should be non-negative"); } @Test @@ -280,5 +287,12 @@ void testWrapAnthropicStreaming() { var messageZero = outputMessages.get(0); assertEquals("assistant", messageZero.get("role").asText()); assertEquals("The capital of France is Paris.", messageZero.get("content").asText()); + + // Verify time to first token + Double timeToFirstToken = + span.getAttributes() + .get(AttributeKey.doubleKey("braintrust.metrics.time_to_first_token")); + assertNotNull(timeToFirstToken, "time_to_first_token should be present for streaming"); + assertTrue(timeToFirstToken >= 0.0, "time_to_first_token should be non-negative"); } }