From f2edb389d0ff06671bccc5f05e25c365d4fe3048 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 23 Jul 2025 10:00:48 +0200 Subject: [PATCH 01/27] Fixed span names to be otel compatible --- sentry_sdk/integrations/openai.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index bf310e5fdc..993be09a31 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -180,7 +180,7 @@ def _new_chat_completion_common(f, *args, **kwargs): span = sentry_sdk.start_span( op=consts.OP.GEN_AI_CHAT, - name=f"{consts.OP.GEN_AI_CHAT} {model}", + name=f"chat {model}", origin=OpenAIIntegration.origin, ) span.__enter__() @@ -365,7 +365,7 @@ def _new_embeddings_create_common(f, *args, **kwargs): with sentry_sdk.start_span( op=consts.OP.GEN_AI_EMBEDDINGS, - name=f"{consts.OP.GEN_AI_EMBEDDINGS} {model}", + name=f"embeddings {model}", origin=OpenAIIntegration.origin, ) as span: set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) @@ -491,7 +491,7 @@ def _new_responses_create_common(f, *args, **kwargs): span = sentry_sdk.start_span( op=consts.OP.GEN_AI_RESPONSES, - name=f"{consts.OP.GEN_AI_RESPONSES} {model}", + name=f"responses {model}", origin=OpenAIIntegration.origin, ) span.__enter__() From a29716b6b9fa18f7b8cf41df676e7a14a7f7590e Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 23 Jul 2025 10:21:32 +0200 Subject: [PATCH 02/27] Add common otel attributes --- sentry_sdk/integrations/openai.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 993be09a31..6fcf0ba5b7 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -124,13 +124,11 @@ def _calculate_token_usage( total_tokens = _get_usage(response.usage, ["total_tokens"]) # Manually count tokens - # TODO: when implementing responses API, check for responses API if input_tokens == 0: for message in messages: if "content" in message: input_tokens += count_tokens(message["content"]) - # TODO: when implementing responses API, check for responses API if output_tokens == 0: if streaming_message_responses is not None: for message in streaming_message_responses: @@ -191,7 +189,9 @@ def _new_chat_completion_common(f, *args, **kwargs): if should_send_default_pii() and integration.include_prompts: set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages) + set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") set_data_normalized(span, SPANDATA.AI_STREAMING, streaming) if hasattr(res, "choices"): @@ -368,7 +368,9 @@ def _new_embeddings_create_common(f, *args, **kwargs): name=f"embeddings {model}", origin=OpenAIIntegration.origin, ) as span: + set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings") if "input" in kwargs and ( should_send_default_pii() and integration.include_prompts @@ -496,7 +498,9 @@ def _new_responses_create_common(f, *args, **kwargs): ) span.__enter__() + set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") if should_send_default_pii() and integration.include_prompts: set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, input) From 5b11b7fbd2271a533b58b9369446e15ad6a5bb02 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 23 Jul 2025 13:50:32 +0200 Subject: [PATCH 03/27] attributes on chat completion --- sentry_sdk/consts.py | 2 +- sentry_sdk/integrations/openai.py | 45 +++++++++++++---- .../integrations/openai_agents/utils.py | 50 +------------------ sentry_sdk/utils.py | 46 +++++++++++++++++ 4 files changed, 84 insertions(+), 59 deletions(-) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 386aa1040e..4ad4acb64f 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -417,7 +417,7 @@ class SPANDATA: GEN_AI_REQUEST_MODEL = "gen_ai.request.model" """ The model identifier being used for the request. - Example: "gpt-4-turbo-preview" + Example: "gpt-4-turbo" """ GEN_AI_REQUEST_PRESENCE_PENALTY = "gen_ai.request.presence_penalty" diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 6fcf0ba5b7..a344039f38 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -11,6 +11,7 @@ from sentry_sdk.utils import ( capture_internal_exceptions, event_from_exception, + safe_serialize, ) from typing import TYPE_CHECKING @@ -183,24 +184,50 @@ def _new_chat_completion_common(f, *args, **kwargs): ) span.__enter__() + # Common attributes + set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") + set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") + set_data_normalized(span, SPANDATA.AI_STREAMING, streaming) + + # Optional attributes + max_tokens = kwargs.get("max_tokens") + if max_tokens is not None: + set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, max_tokens) + + presence_penalty = kwargs.get("presence_penalty") + if presence_penalty is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, presence_penalty + ) + + temperature = kwargs.get("temperature") + if temperature is not None: + set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TEMPERATURE, temperature) + + top_p = kwargs.get("top_p") + if top_p is not None: + set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TOP_P, top_p) + res = yield f, args, kwargs with capture_internal_exceptions(): if should_send_default_pii() and integration.include_prompts: set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages) - set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) - set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") - set_data_normalized(span, SPANDATA.AI_STREAMING, streaming) + if hasattr(res, "model"): + set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, res.model) if hasattr(res, "choices"): if should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - list(map(lambda x: x.message, res.choices)), - ) + response_text = [choice.message.dict() for choice in res.choices] + if len(response_text) > 0: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + safe_serialize(response_text), + ) + _calculate_token_usage(messages, res, span, None, integration.count_tokens) span.__exit__(None, None, None) elif hasattr(res, "_iterator"): diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index dc66521c83..1525346726 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -1,16 +1,14 @@ -import json import sentry_sdk from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable from sentry_sdk.scope import should_send_default_pii -from sentry_sdk.utils import event_from_exception +from sentry_sdk.utils import event_from_exception, safe_serialize from typing import TYPE_CHECKING if TYPE_CHECKING: from typing import Any from typing import Callable - from typing import Union from agents import Usage try: @@ -162,49 +160,3 @@ def _set_output_data(span, result): span.set_data( SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(output_messages["response"]) ) - - -def safe_serialize(data): - # type: (Any) -> str - """Safely serialize to a readable string.""" - - def serialize_item(item): - # type: (Any) -> Union[str, dict[Any, Any], list[Any], tuple[Any, ...]] - if callable(item): - try: - module = getattr(item, "__module__", None) - qualname = getattr(item, "__qualname__", None) - name = getattr(item, "__name__", "anonymous") - - if module and qualname: - full_path = f"{module}.{qualname}" - elif module and name: - full_path = f"{module}.{name}" - else: - full_path = name - - return f"" - except Exception: - return f"" - elif isinstance(item, dict): - return {k: serialize_item(v) for k, v in item.items()} - elif isinstance(item, (list, tuple)): - return [serialize_item(x) for x in item] - elif hasattr(item, "__dict__"): - try: - attrs = { - k: serialize_item(v) - for k, v in vars(item).items() - if not k.startswith("_") - } - return f"<{type(item).__name__} {attrs}>" - except Exception: - return repr(item) - else: - return item - - try: - serialized = serialize_item(data) - return json.dumps(serialized, default=str) - except Exception: - return str(data) diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 3b0ab8d746..9c6f2cfc3b 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -1938,3 +1938,49 @@ def try_convert(convert_func, value): return convert_func(value) except Exception: return None + + +def safe_serialize(data): + # type: (Any) -> str + """Safely serialize to a readable string.""" + + def serialize_item(item): + # type: (Any) -> Union[str, dict[Any, Any], list[Any], tuple[Any, ...]] + if callable(item): + try: + module = getattr(item, "__module__", None) + qualname = getattr(item, "__qualname__", None) + name = getattr(item, "__name__", "anonymous") + + if module and qualname: + full_path = f"{module}.{qualname}" + elif module and name: + full_path = f"{module}.{name}" + else: + full_path = name + + return f"" + except Exception: + return f"" + elif isinstance(item, dict): + return {k: serialize_item(v) for k, v in item.items()} + elif isinstance(item, (list, tuple)): + return [serialize_item(x) for x in item] + elif hasattr(item, "__dict__"): + try: + attrs = { + k: serialize_item(v) + for k, v in vars(item).items() + if not k.startswith("_") + } + return f"<{type(item).__name__} {attrs}>" + except Exception: + return repr(item) + else: + return item + + try: + serialized = serialize_item(data) + return json.dumps(serialized, default=str) + except Exception: + return str(data) From 2f04a2308e737515ba63f4b7401380d34b3604b2 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 23 Jul 2025 14:05:26 +0200 Subject: [PATCH 04/27] organize code --- sentry_sdk/integrations/openai.py | 243 ++++++++++++++++-------------- 1 file changed, 126 insertions(+), 117 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index a344039f38..07adecbb45 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -156,35 +156,19 @@ def _calculate_token_usage( ) -def _new_chat_completion_common(f, *args, **kwargs): - # type: (Any, Any, Any) -> Any - integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) - if integration is None: - return f(*args, **kwargs) +def _set_request_data(span, kwargs, integration): + # type: (Span, dict[str, Any], Integration) -> None + messages = kwargs.get("messages") + if ( + messages is not None + and should_send_default_pii() + and integration.include_prompts + ): + set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages) - if "messages" not in kwargs: - # invalid call (in all versions of openai), let it return error - return f(*args, **kwargs) - - try: - iter(kwargs["messages"]) - except TypeError: - # invalid call (in all versions), messages must be iterable - return f(*args, **kwargs) - - kwargs["messages"] = list(kwargs["messages"]) - messages = kwargs["messages"] + # Common attributes model = kwargs.get("model") streaming = kwargs.get("stream") - - span = sentry_sdk.start_span( - op=consts.OP.GEN_AI_CHAT, - name=f"chat {model}", - origin=OpenAIIntegration.origin, - ) - span.__enter__() - - # Common attributes set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") @@ -209,106 +193,131 @@ def _new_chat_completion_common(f, *args, **kwargs): if top_p is not None: set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TOP_P, top_p) - res = yield f, args, kwargs - with capture_internal_exceptions(): - if should_send_default_pii() and integration.include_prompts: - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages) +def _set_response_data(span, res, kwargs, integration): + # type: (Span, Any, dict[str, Any], Integration) -> None + if hasattr(res, "model"): + set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, res.model) + + messages = kwargs.get("messages", []) - if hasattr(res, "model"): - set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, res.model) + if hasattr(res, "choices"): + if should_send_default_pii() and integration.include_prompts: + response_text = [choice.message.dict() for choice in res.choices] + if len(response_text) > 0: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + safe_serialize(response_text), + ) - if hasattr(res, "choices"): - if should_send_default_pii() and integration.include_prompts: - response_text = [choice.message.dict() for choice in res.choices] - if len(response_text) > 0: - set_data_normalized( + _calculate_token_usage(messages, res, span, None, integration.count_tokens) + + elif hasattr(res, "_iterator"): + data_buf: list[list[str]] = [] # one for each choice + + old_iterator = res._iterator + + def new_iterator(): + # type: () -> Iterator[ChatCompletionChunk] + with capture_internal_exceptions(): + for x in old_iterator: + if hasattr(x, "choices"): + choice_index = 0 + for choice in x.choices: + if hasattr(choice, "delta") and hasattr( + choice.delta, "content" + ): + content = choice.delta.content + if len(data_buf) <= choice_index: + data_buf.append([]) + data_buf[choice_index].append(content or "") + choice_index += 1 + yield x + if len(data_buf) > 0: + all_responses = list(map(lambda chunk: "".join(chunk), data_buf)) + if should_send_default_pii() and integration.include_prompts: + set_data_normalized( + span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses + ) + _calculate_token_usage( + messages, + res, span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - safe_serialize(response_text), + all_responses, + integration.count_tokens, ) - - _calculate_token_usage(messages, res, span, None, integration.count_tokens) span.__exit__(None, None, None) - elif hasattr(res, "_iterator"): - data_buf: list[list[str]] = [] # one for each choice - - old_iterator = res._iterator - - def new_iterator(): - # type: () -> Iterator[ChatCompletionChunk] - with capture_internal_exceptions(): - for x in old_iterator: - if hasattr(x, "choices"): - choice_index = 0 - for choice in x.choices: - if hasattr(choice, "delta") and hasattr( - choice.delta, "content" - ): - content = choice.delta.content - if len(data_buf) <= choice_index: - data_buf.append([]) - data_buf[choice_index].append(content or "") - choice_index += 1 - yield x - if len(data_buf) > 0: - all_responses = list( - map(lambda chunk: "".join(chunk), data_buf) - ) - if should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses - ) - _calculate_token_usage( - messages, - res, - span, - all_responses, - integration.count_tokens, - ) - span.__exit__(None, None, None) - - async def new_iterator_async(): - # type: () -> AsyncIterator[ChatCompletionChunk] - with capture_internal_exceptions(): - async for x in old_iterator: - if hasattr(x, "choices"): - choice_index = 0 - for choice in x.choices: - if hasattr(choice, "delta") and hasattr( - choice.delta, "content" - ): - content = choice.delta.content - if len(data_buf) <= choice_index: - data_buf.append([]) - data_buf[choice_index].append(content or "") - choice_index += 1 - yield x - if len(data_buf) > 0: - all_responses = list( - map(lambda chunk: "".join(chunk), data_buf) - ) - if should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses - ) - _calculate_token_usage( - messages, - res, - span, - all_responses, - integration.count_tokens, - ) - span.__exit__(None, None, None) - if str(type(res._iterator)) == "": - res._iterator = new_iterator_async() - else: - res._iterator = new_iterator() + async def new_iterator_async(): + # type: () -> AsyncIterator[ChatCompletionChunk] + with capture_internal_exceptions(): + async for x in old_iterator: + if hasattr(x, "choices"): + choice_index = 0 + for choice in x.choices: + if hasattr(choice, "delta") and hasattr( + choice.delta, "content" + ): + content = choice.delta.content + if len(data_buf) <= choice_index: + data_buf.append([]) + data_buf[choice_index].append(content or "") + choice_index += 1 + yield x + if len(data_buf) > 0: + all_responses = list(map(lambda chunk: "".join(chunk), data_buf)) + if should_send_default_pii() and integration.include_prompts: + set_data_normalized( + span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses + ) + _calculate_token_usage( + messages, + res, + span, + all_responses, + integration.count_tokens, + ) + span.__exit__(None, None, None) + if str(type(res._iterator)) == "": + res._iterator = new_iterator_async() else: - set_data_normalized(span, "unknown_response", True) - span.__exit__(None, None, None) + res._iterator = new_iterator() + + else: + set_data_normalized(span, "unknown_response", True) + + +def _new_chat_completion_common(f, *args, **kwargs): + # type: (Any, Any, Any) -> Any + integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) + if integration is None: + return f(*args, **kwargs) + + if "messages" not in kwargs: + # invalid call (in all versions of openai), let it return error + return f(*args, **kwargs) + + try: + iter(kwargs["messages"]) + except TypeError: + # invalid call (in all versions), messages must be iterable + return f(*args, **kwargs) + + model = kwargs.get("model") + + with sentry_sdk.start_span( + op=consts.OP.GEN_AI_CHAT, + name=f"chat {model}", + origin=OpenAIIntegration.origin, + ) as span: + _set_request_data(span, kwargs, integration) + + res = yield f, args, kwargs + + _set_response_data(span, res, kwargs, integration) + return res From 6b2f5de42df9b720a8eac3e14c1b72f59a3e8318 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 23 Jul 2025 16:06:58 +0200 Subject: [PATCH 05/27] refactor embeddings --- sentry_sdk/integrations/openai.py | 104 +++++++++++------------------- 1 file changed, 39 insertions(+), 65 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 07adecbb45..af1795e3b8 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -156,11 +156,18 @@ def _calculate_token_usage( ) -def _set_request_data(span, kwargs, integration): - # type: (Span, dict[str, Any], Integration) -> None +def _set_request_data(span, kwargs, operation, integration): + # type: (Span, dict[str, Any], str, Integration) -> None messages = kwargs.get("messages") + if messages is None: + messages = kwargs.get("input") + + if isinstance(messages, str): + messages = [messages] + if ( messages is not None + and len(messages) > 0 and should_send_default_pii() and integration.include_prompts ): @@ -171,7 +178,7 @@ def _set_request_data(span, kwargs, integration): streaming = kwargs.get("stream") set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) - set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation) set_data_normalized(span, SPANDATA.AI_STREAMING, streaming) # Optional attributes @@ -194,16 +201,21 @@ def _set_request_data(span, kwargs, integration): set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TOP_P, top_p) -def _set_response_data(span, res, kwargs, integration): +def _set_response_data(span, response, kwargs, integration): # type: (Span, Any, dict[str, Any], Integration) -> None - if hasattr(res, "model"): - set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, res.model) + if hasattr(response, "model"): + set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model) + + messages = kwargs.get("messages") + if messages is None: + messages = kwargs.get("input") - messages = kwargs.get("messages", []) + if isinstance(messages, str): + messages = [messages] - if hasattr(res, "choices"): + if hasattr(response, "choices"): if should_send_default_pii() and integration.include_prompts: - response_text = [choice.message.dict() for choice in res.choices] + response_text = [choice.message.dict() for choice in response.choices] if len(response_text) > 0: set_data_normalized( span, @@ -211,12 +223,10 @@ def _set_response_data(span, res, kwargs, integration): safe_serialize(response_text), ) - _calculate_token_usage(messages, res, span, None, integration.count_tokens) - - elif hasattr(res, "_iterator"): + elif hasattr(response, "_iterator"): data_buf: list[list[str]] = [] # one for each choice - old_iterator = res._iterator + old_iterator = response._iterator def new_iterator(): # type: () -> Iterator[ChatCompletionChunk] @@ -242,7 +252,7 @@ def new_iterator(): ) _calculate_token_usage( messages, - res, + response, span, all_responses, integration.count_tokens, @@ -273,20 +283,19 @@ async def new_iterator_async(): ) _calculate_token_usage( messages, - res, + response, span, all_responses, integration.count_tokens, ) span.__exit__(None, None, None) - if str(type(res._iterator)) == "": - res._iterator = new_iterator_async() + if str(type(response._iterator)) == "": + response._iterator = new_iterator_async() else: - res._iterator = new_iterator() + response._iterator = new_iterator() - else: - set_data_normalized(span, "unknown_response", True) + _calculate_token_usage(messages, response, span, None, integration.count_tokens) def _new_chat_completion_common(f, *args, **kwargs): @@ -306,19 +315,20 @@ def _new_chat_completion_common(f, *args, **kwargs): return f(*args, **kwargs) model = kwargs.get("model") + operation = "chat" with sentry_sdk.start_span( op=consts.OP.GEN_AI_CHAT, - name=f"chat {model}", + name=f"{operation} {model}", origin=OpenAIIntegration.origin, ) as span: - _set_request_data(span, kwargs, integration) + _set_request_data(span, kwargs, operation, integration) - res = yield f, args, kwargs + response = yield f, args, kwargs - _set_response_data(span, res, kwargs, integration) + _set_response_data(span, response, kwargs, integration) - return res + return response def _wrap_chat_completion_create(f): @@ -398,54 +408,18 @@ def _new_embeddings_create_common(f, *args, **kwargs): return f(*args, **kwargs) model = kwargs.get("model") + operation = "embeddings" with sentry_sdk.start_span( op=consts.OP.GEN_AI_EMBEDDINGS, - name=f"embeddings {model}", + name=f"{operation} {model}", origin=OpenAIIntegration.origin, ) as span: - set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) - set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings") - - if "input" in kwargs and ( - should_send_default_pii() and integration.include_prompts - ): - if isinstance(kwargs["input"], str): - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, [kwargs["input"]] - ) - elif ( - isinstance(kwargs["input"], list) - and len(kwargs["input"]) > 0 - and isinstance(kwargs["input"][0], str) - ): - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, kwargs["input"] - ) + _set_request_data(span, kwargs, operation, integration) response = yield f, args, kwargs - input_tokens = 0 - total_tokens = 0 - if hasattr(response, "usage"): - if hasattr(response.usage, "prompt_tokens") and isinstance( - response.usage.prompt_tokens, int - ): - input_tokens = response.usage.prompt_tokens - if hasattr(response.usage, "total_tokens") and isinstance( - response.usage.total_tokens, int - ): - total_tokens = response.usage.total_tokens - - if input_tokens == 0: - input_tokens = integration.count_tokens(kwargs["input"] or "") - - record_token_usage( - span, - input_tokens=input_tokens, - total_tokens=total_tokens or input_tokens, - ) + _set_response_data(span, response, kwargs, integration) return response From ac4f406973d0806865123841ce147d7b3979d4c3 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 23 Jul 2025 16:35:17 +0200 Subject: [PATCH 06/27] cleanup respones --- sentry_sdk/integrations/openai.py | 46 ++++++++++++------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index af1795e3b8..9c7eea2af6 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -1,5 +1,4 @@ from functools import wraps -import json import sentry_sdk from sentry_sdk import consts @@ -223,6 +222,16 @@ def _set_response_data(span, response, kwargs, integration): safe_serialize(response_text), ) + elif hasattr(response, "output"): + if should_send_default_pii() and integration.include_prompts: + response_text = [item.to_dict() for item in response.output] + if len(response_text) > 0: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + safe_serialize(response_text), + ) + elif hasattr(response, "_iterator"): data_buf: list[list[str]] = [] # one for each choice @@ -499,39 +508,20 @@ def _new_responses_create_common(f, *args, **kwargs): return f(*args, **kwargs) model = kwargs.get("model") - input = kwargs.get("input") + operation = "responses" - span = sentry_sdk.start_span( + with sentry_sdk.start_span( op=consts.OP.GEN_AI_RESPONSES, - name=f"responses {model}", + name=f"{operation} {model}", origin=OpenAIIntegration.origin, - ) - span.__enter__() - - set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) - set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") - - if should_send_default_pii() and integration.include_prompts: - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, input) - - res = yield f, args, kwargs - - if hasattr(res, "output"): - if should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - json.dumps([item.to_dict() for item in res.output]), - ) - _calculate_token_usage([], res, span, None, integration.count_tokens) + ) as span: + _set_request_data(span, kwargs, operation, integration) - else: - set_data_normalized(span, "unknown_response", True) + response = yield f, args, kwargs - span.__exit__(None, None, None) + _set_response_data(span, response, kwargs, integration) - return res + return response def _wrap_responses_create(f): From b54b8dc658a6819b75f59d2192cd78de7ee9001d Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 23 Jul 2025 16:49:33 +0200 Subject: [PATCH 07/27] Add available tools --- sentry_sdk/integrations/openai.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 9c7eea2af6..17ba1831a1 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -191,6 +191,12 @@ def _set_request_data(span, kwargs, operation, integration): span, SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, presence_penalty ) + frequency_penalty = kwargs.get("frequency_penalty") + if frequency_penalty is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, frequency_penalty + ) + temperature = kwargs.get("temperature") if temperature is not None: set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TEMPERATURE, temperature) @@ -199,6 +205,13 @@ def _set_request_data(span, kwargs, operation, integration): if top_p is not None: set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TOP_P, top_p) + # Tools + tools = kwargs.get("tools", []) + if tools is not None and len(tools) > 0: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools) + ) + def _set_response_data(span, response, kwargs, integration): # type: (Span, Any, dict[str, Any], Integration) -> None From 1bbb77215c7aa9ee90e83fe28f33ed7c2e5f208b Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 23 Jul 2025 17:12:08 +0200 Subject: [PATCH 08/27] updated some tests --- sentry_sdk/integrations/openai.py | 6 ++++-- tests/integrations/openai/test_openai.py | 18 ++++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 17ba1831a1..e5fc412ef2 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -174,13 +174,15 @@ def _set_request_data(span, kwargs, operation, integration): # Common attributes model = kwargs.get("model") - streaming = kwargs.get("stream") set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation) - set_data_normalized(span, SPANDATA.AI_STREAMING, streaming) # Optional attributes + streaming = kwargs.get("stream") + if streaming is not None: + set_data_normalized(span, SPANDATA.AI_STREAMING, streaming) + max_tokens = kwargs.get("max_tokens") if max_tokens is not None: set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, max_tokens) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index f6b18e6908..4291626319 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1,3 +1,4 @@ +import json import pytest from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding @@ -52,7 +53,7 @@ async def __call__(self, *args, **kwargs): ) ], created=10000000, - model="model-id", + model="response-model-id", object="chat.completion", usage=CompletionUsage( completion_tokens=10, @@ -86,7 +87,7 @@ async def __call__(self, *args, **kwargs): tool_choice="none", tools=[], created_at=10000000, - model="model-id", + model="response-model-id", object="response", usage=ResponseUsage( input_tokens=20, @@ -143,7 +144,7 @@ def test_nonstreaming_chat_completion( assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]["content"] assert ( "the model response" - in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]["content"] + in json.loads(span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT])[0]["content"] ) else: assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] @@ -188,7 +189,7 @@ async def test_nonstreaming_chat_completion_async( assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]["content"] assert ( "the model response" - in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]["content"] + in json.loads(span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT])[0]["content"] ) else: assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] @@ -986,7 +987,10 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): assert spans[0]["op"] == "gen_ai.responses" assert spans[0]["origin"] == "auto.ai.openai" assert spans[0]["data"] == { + "gen_ai.operation.name": "responses", "gen_ai.request.model": "gpt-4o", + "gen_ai.response.model": "response-model-id", + "gen_ai.system": "openai", "gen_ai.usage.input_tokens": 20, "gen_ai.usage.input_tokens.cached": 5, "gen_ai.usage.output_tokens": 10, @@ -1026,8 +1030,11 @@ def test_ai_client_span_responses_api(sentry_init, capture_events): assert spans[0]["op"] == "gen_ai.responses" assert spans[0]["origin"] == "auto.ai.openai" assert spans[0]["data"] == { + "gen_ai.operation.name": "responses", "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?", "gen_ai.request.model": "gpt-4o", + "gen_ai.system": "openai", + "gen_ai.response.model": "response-model-id", "gen_ai.usage.input_tokens": 20, "gen_ai.usage.input_tokens.cached": 5, "gen_ai.usage.output_tokens": 10, @@ -1103,8 +1110,11 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): assert spans[0]["op"] == "gen_ai.responses" assert spans[0]["origin"] == "auto.ai.openai" assert spans[0]["data"] == { + "gen_ai.operation.name": "responses", "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?", "gen_ai.request.model": "gpt-4o", + "gen_ai.response.model": "response-model-id", + "gen_ai.system": "openai", "gen_ai.usage.input_tokens": 20, "gen_ai.usage.input_tokens.cached": 5, "gen_ai.usage.output_tokens": 10, From c80a4137d5c11db320d189fc8f4508b3bc7cf33f Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 24 Jul 2025 09:18:37 +0200 Subject: [PATCH 09/27] fixed tests --- sentry_sdk/integrations/openai.py | 40 +++++++++++++------- tests/integrations/openai/test_openai.py | 47 ++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 14 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index e5fc412ef2..812a41e17a 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -155,6 +155,7 @@ def _calculate_token_usage( ) +# TODO: rename to _set_input_data and _set_output_data def _set_request_data(span, kwargs, operation, integration): # type: (Span, dict[str, Any], str, Integration) -> None messages = kwargs.get("messages") @@ -172,6 +173,7 @@ def _set_request_data(span, kwargs, operation, integration): ): set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages) + # TODO: make mapping and loop over kwargs to set attributes # Common attributes model = kwargs.get("model") set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") @@ -215,8 +217,8 @@ def _set_request_data(span, kwargs, operation, integration): ) -def _set_response_data(span, response, kwargs, integration): - # type: (Span, Any, dict[str, Any], Integration) -> None +def _set_response_data(span, response, kwargs, integration, finish_span=True): + # type: (Span, Any, dict[str, Any], Integration, bool) -> None if hasattr(response, "model"): set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model) @@ -236,6 +238,7 @@ def _set_response_data(span, response, kwargs, integration): SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(response_text), ) + span.__exit__(None, None, None) elif hasattr(response, "output"): if should_send_default_pii() and integration.include_prompts: @@ -318,6 +321,10 @@ async def new_iterator_async(): response._iterator = new_iterator_async() else: response._iterator = new_iterator() + else: + set_data_normalized(span, "unknown_response", True) + if finish_span: + span.__exit__(None, None, None) _calculate_token_usage(messages, response, span, None, integration.count_tokens) @@ -341,16 +348,18 @@ def _new_chat_completion_common(f, *args, **kwargs): model = kwargs.get("model") operation = "chat" - with sentry_sdk.start_span( + span = sentry_sdk.start_span( op=consts.OP.GEN_AI_CHAT, name=f"{operation} {model}", origin=OpenAIIntegration.origin, - ) as span: - _set_request_data(span, kwargs, operation, integration) + ) + span.__enter__() - response = yield f, args, kwargs + _set_request_data(span, kwargs, operation, integration) - _set_response_data(span, response, kwargs, integration) + response = yield f, args, kwargs + + _set_response_data(span, response, kwargs, integration) return response @@ -443,7 +452,7 @@ def _new_embeddings_create_common(f, *args, **kwargs): response = yield f, args, kwargs - _set_response_data(span, response, kwargs, integration) + _set_response_data(span, response, kwargs, integration, finish_span=False) return response @@ -525,18 +534,21 @@ def _new_responses_create_common(f, *args, **kwargs): model = kwargs.get("model") operation = "responses" - with sentry_sdk.start_span( + span = sentry_sdk.start_span( op=consts.OP.GEN_AI_RESPONSES, name=f"{operation} {model}", origin=OpenAIIntegration.origin, - ) as span: - _set_request_data(span, kwargs, operation, integration) + ) + span.__enter__() + _set_request_data(span, kwargs, operation, integration) - response = yield f, args, kwargs + response = yield f, args, kwargs - _set_response_data(span, response, kwargs, integration) + _set_response_data(span, response, kwargs, integration) - return response + span.__exit__(None, None, None) + + return response def _wrap_responses_create(f): diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 4291626319..a08e319bc1 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1126,6 +1126,53 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): } +@pytest.mark.asyncio +@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") +async def test_ai_client_span_streaming_responses_async_api( + sentry_init, capture_events +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = AsyncOpenAI(api_key="z") + client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) + + with start_transaction(name="openai tx"): + await client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + stream=True, + ) + + (transaction,) = events + spans = transaction["spans"] + + assert len(spans) == 1 + assert spans[0]["op"] == "gen_ai.responses" + assert spans[0]["origin"] == "auto.ai.openai" + assert spans[0]["data"] == { + "ai.streaming": True, + "gen_ai.operation.name": "responses", + "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?", + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.model": "response-model-id", + "gen_ai.system": "openai", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.response.text": '[{"id": "message-id", "content": [{"annotations": [], "text": "the model response", "type": "output_text"}], "role": "assistant", "status": "completed", "type": "message"}]', + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_error_in_responses_async_api(sentry_init, capture_events): From 3ad5e509d355658288c6349ba172168efd6be03f Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 24 Jul 2025 09:29:11 +0200 Subject: [PATCH 10/27] cleanup --- sentry_sdk/integrations/openai.py | 83 +++++++++++++------------------ 1 file changed, 35 insertions(+), 48 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 812a41e17a..ed22dd51a0 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -156,8 +156,9 @@ def _calculate_token_usage( # TODO: rename to _set_input_data and _set_output_data -def _set_request_data(span, kwargs, operation, integration): +def _set_input_data(span, kwargs, operation, integration): # type: (Span, dict[str, Any], str, Integration) -> None + # Input messages (the prompt or data sent to the model) messages = kwargs.get("messages") if messages is None: messages = kwargs.get("input") @@ -173,43 +174,26 @@ def _set_request_data(span, kwargs, operation, integration): ): set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages) - # TODO: make mapping and loop over kwargs to set attributes - # Common attributes - model = kwargs.get("model") + # Input attributes: Common set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation) - # Optional attributes - streaming = kwargs.get("stream") - if streaming is not None: - set_data_normalized(span, SPANDATA.AI_STREAMING, streaming) - - max_tokens = kwargs.get("max_tokens") - if max_tokens is not None: - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, max_tokens) - - presence_penalty = kwargs.get("presence_penalty") - if presence_penalty is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, presence_penalty - ) - - frequency_penalty = kwargs.get("frequency_penalty") - if frequency_penalty is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, frequency_penalty - ) - - temperature = kwargs.get("temperature") - if temperature is not None: - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TEMPERATURE, temperature) - - top_p = kwargs.get("top_p") - if top_p is not None: - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TOP_P, top_p) - - # Tools + # Input attributes: Optional + kwargs_keys_to_attributes = { + "model": SPANDATA.GEN_AI_REQUEST_MODEL, + "stream": SPANDATA.AI_STREAMING, + "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, + "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, + "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, + "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE, + "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P, + } + for key, attribute in kwargs_keys_to_attributes.items(): + value = kwargs.get(key) + if value is not None: + set_data_normalized(span, attribute, value) + + # Input attributes: Tools tools = kwargs.get("tools", []) if tools is not None and len(tools) > 0: set_data_normalized( @@ -217,7 +201,7 @@ def _set_request_data(span, kwargs, operation, integration): ) -def _set_response_data(span, response, kwargs, integration, finish_span=True): +def _set_output_data(span, response, kwargs, integration, finish_span=True): # type: (Span, Any, dict[str, Any], Integration, bool) -> None if hasattr(response, "model"): set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model) @@ -238,7 +222,8 @@ def _set_response_data(span, response, kwargs, integration, finish_span=True): SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(response_text), ) - span.__exit__(None, None, None) + if finish_span: + span.__exit__(None, None, None) elif hasattr(response, "output"): if should_send_default_pii() and integration.include_prompts: @@ -249,6 +234,8 @@ def _set_response_data(span, response, kwargs, integration, finish_span=True): SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(response_text), ) + if finish_span: + span.__exit__(None, None, None) elif hasattr(response, "_iterator"): data_buf: list[list[str]] = [] # one for each choice @@ -284,7 +271,8 @@ def new_iterator(): all_responses, integration.count_tokens, ) - span.__exit__(None, None, None) + if finish_span: + span.__exit__(None, None, None) async def new_iterator_async(): # type: () -> AsyncIterator[ChatCompletionChunk] @@ -315,14 +303,14 @@ async def new_iterator_async(): all_responses, integration.count_tokens, ) - span.__exit__(None, None, None) + if finish_span: + span.__exit__(None, None, None) if str(type(response._iterator)) == "": response._iterator = new_iterator_async() else: response._iterator = new_iterator() else: - set_data_normalized(span, "unknown_response", True) if finish_span: span.__exit__(None, None, None) @@ -355,11 +343,11 @@ def _new_chat_completion_common(f, *args, **kwargs): ) span.__enter__() - _set_request_data(span, kwargs, operation, integration) + _set_input_data(span, kwargs, operation, integration) response = yield f, args, kwargs - _set_response_data(span, response, kwargs, integration) + _set_output_data(span, response, kwargs, integration, finish_span=True) return response @@ -448,11 +436,11 @@ def _new_embeddings_create_common(f, *args, **kwargs): name=f"{operation} {model}", origin=OpenAIIntegration.origin, ) as span: - _set_request_data(span, kwargs, operation, integration) + _set_input_data(span, kwargs, operation, integration) response = yield f, args, kwargs - _set_response_data(span, response, kwargs, integration, finish_span=False) + _set_output_data(span, response, kwargs, integration, finish_span=False) return response @@ -540,13 +528,12 @@ def _new_responses_create_common(f, *args, **kwargs): origin=OpenAIIntegration.origin, ) span.__enter__() - _set_request_data(span, kwargs, operation, integration) - response = yield f, args, kwargs + _set_input_data(span, kwargs, operation, integration) - _set_response_data(span, response, kwargs, integration) + response = yield f, args, kwargs - span.__exit__(None, None, None) + _set_output_data(span, response, kwargs, integration, finish_span=True) return response From dddaab2c974fba62d1543780d65855f6f27bc6ca Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 24 Jul 2025 10:21:07 +0200 Subject: [PATCH 11/27] fix token count for streaming responses api --- sentry_sdk/integrations/openai.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index ed22dd51a0..319790c11f 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -126,8 +126,10 @@ def _calculate_token_usage( # Manually count tokens if input_tokens == 0: for message in messages: - if "content" in message: + if isinstance(message, dict) and "content" in message: input_tokens += count_tokens(message["content"]) + elif isinstance(message, str): + input_tokens += count_tokens(message) if output_tokens == 0: if streaming_message_responses is not None: @@ -246,6 +248,7 @@ def new_iterator(): # type: () -> Iterator[ChatCompletionChunk] with capture_internal_exceptions(): for x in old_iterator: + # OpenAI chat completion API if hasattr(x, "choices"): choice_index = 0 for choice in x.choices: @@ -257,6 +260,11 @@ def new_iterator(): data_buf.append([]) data_buf[choice_index].append(content or "") choice_index += 1 + # OpenAI responses API + elif hasattr(x, "delta"): + if len(data_buf) == 0: + data_buf.append([]) + data_buf[0].append(x.delta or "") yield x if len(data_buf) > 0: all_responses = list(map(lambda chunk: "".join(chunk), data_buf)) From a83fca9c666d1198433fca0110d3736656c40c73 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 24 Jul 2025 11:06:49 +0200 Subject: [PATCH 12/27] fixed streaming responses token count --- sentry_sdk/integrations/openai.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 319790c11f..c55a095aaa 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -286,6 +286,7 @@ async def new_iterator_async(): # type: () -> AsyncIterator[ChatCompletionChunk] with capture_internal_exceptions(): async for x in old_iterator: + # OpenAI chat completion API if hasattr(x, "choices"): choice_index = 0 for choice in x.choices: @@ -297,6 +298,11 @@ async def new_iterator_async(): data_buf.append([]) data_buf[choice_index].append(content or "") choice_index += 1 + # OpenAI responses API + elif hasattr(x, "delta"): + if len(data_buf) == 0: + data_buf.append([]) + data_buf[0].append(x.delta or "") yield x if len(data_buf) > 0: all_responses = list(map(lambda chunk: "".join(chunk), data_buf)) From c589030da801cabf4ce9244ff9aa768202399890 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 25 Jul 2025 09:30:16 +0200 Subject: [PATCH 13/27] typing --- sentry_sdk/integrations/openai.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index c55a095aaa..c1efd9e485 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -157,9 +157,8 @@ def _calculate_token_usage( ) -# TODO: rename to _set_input_data and _set_output_data def _set_input_data(span, kwargs, operation, integration): - # type: (Span, dict[str, Any], str, Integration) -> None + # type: (Span, dict[str, Any], str, OpenAIIntegration) -> None # Input messages (the prompt or data sent to the model) messages = kwargs.get("messages") if messages is None: @@ -204,7 +203,7 @@ def _set_input_data(span, kwargs, operation, integration): def _set_output_data(span, response, kwargs, integration, finish_span=True): - # type: (Span, Any, dict[str, Any], Integration, bool) -> None + # type: (Span, Any, dict[str, Any], OpenAIIntegration, bool) -> None if hasattr(response, "model"): set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model) From 45300bff3f72df292d5f91dc98c3f9f6e78fd2cf Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 25 Jul 2025 09:43:16 +0200 Subject: [PATCH 14/27] typing --- sentry_sdk/integrations/openai.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index c1efd9e485..293b2ea864 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -160,8 +160,8 @@ def _calculate_token_usage( def _set_input_data(span, kwargs, operation, integration): # type: (Span, dict[str, Any], str, OpenAIIntegration) -> None # Input messages (the prompt or data sent to the model) - messages = kwargs.get("messages") - if messages is None: + messages = kwargs.get("messages", []) + if messages == []: messages = kwargs.get("input") if isinstance(messages, str): @@ -207,8 +207,10 @@ def _set_output_data(span, response, kwargs, integration, finish_span=True): if hasattr(response, "model"): set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model) - messages = kwargs.get("messages") - if messages is None: + # Input messages (the prompt or data sent to the model) + # used for the token usage calculation + messages = kwargs.get("messages", []) + if messages == []: messages = kwargs.get("input") if isinstance(messages, str): From b5dd11578b7b9af1c15fa0c5e71316433e08ce03 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 25 Jul 2025 14:53:08 +0200 Subject: [PATCH 15/27] More tests --- sentry_sdk/integrations/openai.py | 61 ++++++-- tests/integrations/openai/test_openai.py | 180 +++++++++++++++++++++++ 2 files changed, 227 insertions(+), 14 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 293b2ea864..f234f67a6c 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -248,6 +248,7 @@ def _set_output_data(span, response, kwargs, integration, finish_span=True): def new_iterator(): # type: () -> Iterator[ChatCompletionChunk] with capture_internal_exceptions(): + count_tokens_manually = True for x in old_iterator: # OpenAI chat completion API if hasattr(x, "choices"): @@ -261,31 +262,48 @@ def new_iterator(): data_buf.append([]) data_buf[choice_index].append(content or "") choice_index += 1 + # OpenAI responses API elif hasattr(x, "delta"): if len(data_buf) == 0: data_buf.append([]) data_buf[0].append(x.delta or "") + + # OpenAI responses API end of streaming response + if x.__class__.__name__ == "ResponseCompletedEvent": + _calculate_token_usage( + messages, + x.response, + span, + None, + integration.count_tokens, + ) + count_tokens_manually = False + yield x + if len(data_buf) > 0: all_responses = list(map(lambda chunk: "".join(chunk), data_buf)) if should_send_default_pii() and integration.include_prompts: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses ) - _calculate_token_usage( - messages, - response, - span, - all_responses, - integration.count_tokens, - ) + if count_tokens_manually: + _calculate_token_usage( + messages, + response, + span, + all_responses, + integration.count_tokens, + ) + if finish_span: span.__exit__(None, None, None) async def new_iterator_async(): # type: () -> AsyncIterator[ChatCompletionChunk] with capture_internal_exceptions(): + count_tokens_manually = True async for x in old_iterator: # OpenAI chat completion API if hasattr(x, "choices"): @@ -299,25 +317,40 @@ async def new_iterator_async(): data_buf.append([]) data_buf[choice_index].append(content or "") choice_index += 1 + # OpenAI responses API elif hasattr(x, "delta"): if len(data_buf) == 0: data_buf.append([]) data_buf[0].append(x.delta or "") + + # OpenAI responses API end of streaming response + if x.__class__.__name__ == "ResponseCompletedEvent": + _calculate_token_usage( + messages, + x.response, + span, + None, + integration.count_tokens, + ) + count_tokens_manually = False + yield x + if len(data_buf) > 0: all_responses = list(map(lambda chunk: "".join(chunk), data_buf)) if should_send_default_pii() and integration.include_prompts: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses ) - _calculate_token_usage( - messages, - response, - span, - all_responses, - integration.count_tokens, - ) + if count_tokens_manually: + _calculate_token_usage( + messages, + response, + span, + all_responses, + integration.count_tokens, + ) if finish_span: span.__exit__(None, None, None) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index a08e319bc1..45d60d22d8 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -6,6 +6,9 @@ from openai.types.chat.chat_completion import Choice from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage +from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent +from openai.types.responses.response_created_event import ResponseCreatedEvent +from openai.types.responses.response_completed_event import ResponseCompletedEvent SKIP_RESPONSES_TESTS = False @@ -1209,3 +1212,180 @@ async def test_error_in_responses_async_api(sentry_init, capture_events): error_event["contexts"]["trace"]["trace_id"] == transaction_event["contexts"]["trace"]["trace_id"] ) + + +EXAMPLE_RESPONSES_STREAM = [ + ResponseCreatedEvent( + sequence_number=1, + type="response.created", + response=Response( + id="chat-id", + created_at=10000000, + model="response-model-id", + object="response", + output=[], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + ), + ), + ResponseTextDeltaEvent( + item_id="msg_1", + sequence_number=2, + type="response.output_text.delta", + logprobs=[], + content_index=0, + output_index=0, + delta="hel", + ), + ResponseTextDeltaEvent( + item_id="msg_1", + sequence_number=3, + type="response.output_text.delta", + logprobs=[], + content_index=0, + output_index=0, + delta="lo ", + ), + ResponseTextDeltaEvent( + item_id="msg_1", + sequence_number=4, + type="response.output_text.delta", + logprobs=[], + content_index=0, + output_index=0, + delta="world", + ), + ResponseCompletedEvent( + sequence_number=5, + type="response.completed", + response=Response( + id="chat-id", + created_at=10000000, + model="response-model-id", + object="response", + output=[], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + usage=ResponseUsage( + input_tokens=20, + input_tokens_details=InputTokensDetails( + cached_tokens=5, + ), + output_tokens=10, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=8, + ), + total_tokens=30, + ), + ), + ), +] + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_streaming_responses_api( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[ + OpenAIIntegration( + include_prompts=include_prompts, + ) + ], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + returned_stream = Stream(cast_to=None, response=None, client=client) + returned_stream._iterator = EXAMPLE_RESPONSES_STREAM + client.responses._post = mock.Mock(return_value=returned_stream) + + with start_transaction(name="openai tx"): + response_stream = client.responses.create( + model="some-model", + input="hello", + stream=True, + ) + + response_string = "" + for item in response_stream: + if hasattr(item, "delta"): + response_string += item.delta + + assert response_string == "hello world" + + (transaction,) = events + (span,) = transaction["spans"] + assert span["op"] == "gen_ai.responses" + + if send_default_pii and include_prompts: + assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == "hello" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.output_tokens"] == 10 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +async def test_streaming_responses_api_async( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[ + OpenAIIntegration( + include_prompts=include_prompts, + ) + ], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = AsyncOpenAI(api_key="z") + returned_stream = AsyncStream(cast_to=None, response=None, client=client) + returned_stream._iterator = async_iterator(EXAMPLE_RESPONSES_STREAM) + client.responses._post = AsyncMock(return_value=returned_stream) + + with start_transaction(name="openai tx"): + response_stream = await client.responses.create( + model="some-model", + input="hello", + stream=True, + ) + + response_string = "" + async for item in response_stream: + if hasattr(item, "delta"): + response_string += item.delta + + assert response_string == "hello world" + + (transaction,) = events + (span,) = transaction["spans"] + assert span["op"] == "gen_ai.responses" + + if send_default_pii and include_prompts: + assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == "hello" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"]["gen_ai.usage.input_tokens"] == 20 + assert span["data"]["gen_ai.usage.output_tokens"] == 10 + assert span["data"]["gen_ai.usage.total_tokens"] == 30 From 2c5d1f09d09981438f86069f6fbe957d47d1dbe9 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 25 Jul 2025 15:00:59 +0200 Subject: [PATCH 16/27] import ordering --- tests/integrations/openai/test_openai.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 45d60d22d8..f028f4fca2 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -6,13 +6,13 @@ from openai.types.chat.chat_completion import Choice from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage -from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent -from openai.types.responses.response_created_event import ResponseCreatedEvent -from openai.types.responses.response_completed_event import ResponseCompletedEvent SKIP_RESPONSES_TESTS = False try: + from openai.types.responses.response_completed_event import ResponseCompletedEvent + from openai.types.responses.response_created_event import ResponseCreatedEvent + from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent from openai.types.responses.response_usage import ( InputTokensDetails, OutputTokensDetails, From f629b0d27627e6006a8f5f7c9ae42e6ff89ecc96 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 25 Jul 2025 15:08:57 +0200 Subject: [PATCH 17/27] tests --- tests/integrations/openai/test_openai.py | 131 ++++++++++++----------- 1 file changed, 67 insertions(+), 64 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index f028f4fca2..b3626d4c43 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1214,74 +1214,77 @@ async def test_error_in_responses_async_api(sentry_init, capture_events): ) -EXAMPLE_RESPONSES_STREAM = [ - ResponseCreatedEvent( - sequence_number=1, - type="response.created", - response=Response( - id="chat-id", - created_at=10000000, - model="response-model-id", - object="response", - output=[], - parallel_tool_calls=False, - tool_choice="none", - tools=[], +if SKIP_RESPONSES_TESTS: + EXAMPLE_RESPONSES_STREAM = [] +else: + EXAMPLE_RESPONSES_STREAM = [ + ResponseCreatedEvent( + sequence_number=1, + type="response.created", + response=Response( + id="chat-id", + created_at=10000000, + model="response-model-id", + object="response", + output=[], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + ), ), - ), - ResponseTextDeltaEvent( - item_id="msg_1", - sequence_number=2, - type="response.output_text.delta", - logprobs=[], - content_index=0, - output_index=0, - delta="hel", - ), - ResponseTextDeltaEvent( - item_id="msg_1", - sequence_number=3, - type="response.output_text.delta", - logprobs=[], - content_index=0, - output_index=0, - delta="lo ", - ), - ResponseTextDeltaEvent( - item_id="msg_1", - sequence_number=4, - type="response.output_text.delta", - logprobs=[], - content_index=0, - output_index=0, - delta="world", - ), - ResponseCompletedEvent( - sequence_number=5, - type="response.completed", - response=Response( - id="chat-id", - created_at=10000000, - model="response-model-id", - object="response", - output=[], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - usage=ResponseUsage( - input_tokens=20, - input_tokens_details=InputTokensDetails( - cached_tokens=5, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=8, + ResponseTextDeltaEvent( + item_id="msg_1", + sequence_number=2, + type="response.output_text.delta", + logprobs=[], + content_index=0, + output_index=0, + delta="hel", + ), + ResponseTextDeltaEvent( + item_id="msg_1", + sequence_number=3, + type="response.output_text.delta", + logprobs=[], + content_index=0, + output_index=0, + delta="lo ", + ), + ResponseTextDeltaEvent( + item_id="msg_1", + sequence_number=4, + type="response.output_text.delta", + logprobs=[], + content_index=0, + output_index=0, + delta="world", + ), + ResponseCompletedEvent( + sequence_number=5, + type="response.completed", + response=Response( + id="chat-id", + created_at=10000000, + model="response-model-id", + object="response", + output=[], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + usage=ResponseUsage( + input_tokens=20, + input_tokens_details=InputTokensDetails( + cached_tokens=5, + ), + output_tokens=10, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=8, + ), + total_tokens=30, ), - total_tokens=30, ), ), - ), -] + ] @pytest.mark.parametrize( From d76a56341b60a87729e75b869ba47c512e3f09cf Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 25 Jul 2025 15:18:10 +0200 Subject: [PATCH 18/27] tests --- tests/integrations/openai/test_openai.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index b3626d4c43..fe282d0853 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1291,6 +1291,7 @@ async def test_error_in_responses_async_api(sentry_init, capture_events): "send_default_pii, include_prompts", [(True, True), (True, False), (False, True), (False, False)], ) +@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_streaming_responses_api( sentry_init, capture_events, send_default_pii, include_prompts ): @@ -1345,6 +1346,7 @@ def test_streaming_responses_api( "send_default_pii, include_prompts", [(True, True), (True, False), (False, True), (False, False)], ) +@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_streaming_responses_api_async( sentry_init, capture_events, send_default_pii, include_prompts ): From f9dfe5e7a6024e5c4a111bc2fb8788f544fe5ac6 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 28 Jul 2025 10:13:59 +0200 Subject: [PATCH 19/27] Rename pipeline name --- sentry_sdk/ai/monitoring.py | 6 +++--- sentry_sdk/consts.py | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/ai/monitoring.py b/sentry_sdk/ai/monitoring.py index 7a687736d0..e3f372c3ba 100644 --- a/sentry_sdk/ai/monitoring.py +++ b/sentry_sdk/ai/monitoring.py @@ -40,7 +40,7 @@ def sync_wrapped(*args, **kwargs): for k, v in kwargs.pop("sentry_data", {}).items(): span.set_data(k, v) if curr_pipeline: - span.set_data(SPANDATA.AI_PIPELINE_NAME, curr_pipeline) + span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, curr_pipeline) return f(*args, **kwargs) else: _ai_pipeline_name.set(description) @@ -69,7 +69,7 @@ async def async_wrapped(*args, **kwargs): for k, v in kwargs.pop("sentry_data", {}).items(): span.set_data(k, v) if curr_pipeline: - span.set_data(SPANDATA.AI_PIPELINE_NAME, curr_pipeline) + span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, curr_pipeline) return await f(*args, **kwargs) else: _ai_pipeline_name.set(description) @@ -108,7 +108,7 @@ def record_token_usage( # TODO: move pipeline name elsewhere ai_pipeline_name = get_ai_pipeline_name() if ai_pipeline_name: - span.set_data(SPANDATA.AI_PIPELINE_NAME, ai_pipeline_name) + span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, ai_pipeline_name) if input_tokens is not None: span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, input_tokens) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 533aa9f815..9d8842ac9f 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -169,6 +169,7 @@ class SPANDATA: AI_PIPELINE_NAME = "ai.pipeline.name" """ Name of the AI pipeline or chain being executed. + DEPRECATED: Use GEN_AI_PIPELINE_NAME instead. Example: "qa-pipeline" """ @@ -372,6 +373,12 @@ class SPANDATA: Example: "chat" """ + GEN_AI_PIPELINE_NAME = "gen_ai.pipeline.name" + """ + Name of the AI pipeline or chain being executed. + Example: "qa-pipeline" + """ + GEN_AI_RESPONSE_MODEL = "gen_ai.response.model" """ Exact model identifier used to generate the response From 14baf6cfc5c9efb4dbd39ca58932ea0d134f75d3 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 28 Jul 2025 10:15:36 +0200 Subject: [PATCH 20/27] Rename streaming attribute --- sentry_sdk/consts.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 9d8842ac9f..a82ff94c49 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -230,6 +230,7 @@ class SPANDATA: AI_STREAMING = "ai.streaming" """ Whether or not the AI model call's response was streamed back asynchronously + DEPRECATED: Use GEN_AI_RESPONSE_STREAMING instead. Example: true """ @@ -385,6 +386,12 @@ class SPANDATA: Example: gpt-4o-mini-2024-07-18 """ + GEN_AI_RESPONSE_STREAMING = "gen_ai.response.streaming" + """ + Whether or not the AI model call's response was streamed back asynchronously + Example: true + """ + GEN_AI_RESPONSE_TEXT = "gen_ai.response.text" """ The model's response text messages. From f2cab2d1780f656c160cb4e1812faea6cad0484b Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 28 Jul 2025 12:09:10 +0200 Subject: [PATCH 21/27] Apply suggestions from code review Co-authored-by: Ivana Kellyer --- sentry_sdk/integrations/openai.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index f234f67a6c..9520fdf16f 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -270,7 +270,7 @@ def new_iterator(): data_buf[0].append(x.delta or "") # OpenAI responses API end of streaming response - if x.__class__.__name__ == "ResponseCompletedEvent": + if isinstance(x, ResponseCompletedEvent): _calculate_token_usage( messages, x.response, @@ -283,7 +283,7 @@ def new_iterator(): yield x if len(data_buf) > 0: - all_responses = list(map(lambda chunk: "".join(chunk), data_buf)) + all_responses = ["".join(chunk) for chunk in data_buf] if should_send_default_pii() and integration.include_prompts: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses @@ -338,7 +338,7 @@ async def new_iterator_async(): yield x if len(data_buf) > 0: - all_responses = list(map(lambda chunk: "".join(chunk), data_buf)) + all_responses = ["".join(chunk) for chunk in data_buf] if should_send_default_pii() and integration.include_prompts: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses From d13a5d0d5bc4c5385051335a781b2d68b52d83b0 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 28 Jul 2025 12:10:52 +0200 Subject: [PATCH 22/27] review feedback --- sentry_sdk/integrations/openai.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 9520fdf16f..ab965ed706 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -32,6 +32,7 @@ try: # responses API support was introduced in v1.66.0 from openai.resources.responses import Responses, AsyncResponses + from openai.types.responses.response_completed_event import ResponseCompletedEvent except ImportError: RESPONSES_API_ENABLED = False @@ -325,7 +326,7 @@ async def new_iterator_async(): data_buf[0].append(x.delta or "") # OpenAI responses API end of streaming response - if x.__class__.__name__ == "ResponseCompletedEvent": + if isinstance(x, ResponseCompletedEvent): _calculate_token_usage( messages, x.response, From 157f95af0e2345bc42927d0f9a9441826050ebb6 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 28 Jul 2025 12:27:11 +0200 Subject: [PATCH 23/27] Review feedback --- sentry_sdk/integrations/openai.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index ab965ed706..9b43fac29a 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -100,7 +100,7 @@ def _get_usage(usage, names): def _calculate_token_usage( messages, response, span, streaming_message_responses, count_tokens ): - # type: (Iterable[ChatCompletionMessageParam], Any, Span, Optional[List[str]], Callable[..., Any]) -> None + # type: (Optional[Iterable[ChatCompletionMessageParam]], Any, Span, Optional[List[str]], Callable[..., Any]) -> None input_tokens = 0 # type: Optional[int] input_tokens_cached = 0 # type: Optional[int] output_tokens = 0 # type: Optional[int] @@ -126,7 +126,7 @@ def _calculate_token_usage( # Manually count tokens if input_tokens == 0: - for message in messages: + for message in messages or []: if isinstance(message, dict) and "content" in message: input_tokens += count_tokens(message["content"]) elif isinstance(message, str): @@ -161,8 +161,8 @@ def _calculate_token_usage( def _set_input_data(span, kwargs, operation, integration): # type: (Span, dict[str, Any], str, OpenAIIntegration) -> None # Input messages (the prompt or data sent to the model) - messages = kwargs.get("messages", []) - if messages == []: + messages = kwargs.get("messages") + if messages is None: messages = kwargs.get("input") if isinstance(messages, str): @@ -196,7 +196,7 @@ def _set_input_data(span, kwargs, operation, integration): set_data_normalized(span, attribute, value) # Input attributes: Tools - tools = kwargs.get("tools", []) + tools = kwargs.get("tools") if tools is not None and len(tools) > 0: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools) @@ -210,11 +210,11 @@ def _set_output_data(span, response, kwargs, integration, finish_span=True): # Input messages (the prompt or data sent to the model) # used for the token usage calculation - messages = kwargs.get("messages", []) - if messages == []: + messages = kwargs.get("messages") + if messages is None: messages = kwargs.get("input") - if isinstance(messages, str): + if messages is not None and isinstance(messages, str): messages = [messages] if hasattr(response, "choices"): From 0aa26ebf1adede7f3bc5b7e54c52f51af12f36d0 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 28 Jul 2025 12:47:12 +0200 Subject: [PATCH 24/27] resilience --- sentry_sdk/integrations/openai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 9b43fac29a..72363590bb 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -271,7 +271,7 @@ def new_iterator(): data_buf[0].append(x.delta or "") # OpenAI responses API end of streaming response - if isinstance(x, ResponseCompletedEvent): + if RESPONSES_API_ENABLED and isinstance(x, ResponseCompletedEvent): _calculate_token_usage( messages, x.response, @@ -326,7 +326,7 @@ async def new_iterator_async(): data_buf[0].append(x.delta or "") # OpenAI responses API end of streaming response - if isinstance(x, ResponseCompletedEvent): + if RESPONSES_API_ENABLED and isinstance(x, ResponseCompletedEvent): _calculate_token_usage( messages, x.response, From e04174e4d26b66621b61676da4d6632534adb43a Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 28 Jul 2025 12:56:32 +0200 Subject: [PATCH 25/27] steaming --- sentry_sdk/integrations/openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index f234f67a6c..00875787bc 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -182,7 +182,7 @@ def _set_input_data(span, kwargs, operation, integration): # Input attributes: Optional kwargs_keys_to_attributes = { "model": SPANDATA.GEN_AI_REQUEST_MODEL, - "stream": SPANDATA.AI_STREAMING, + "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING, "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, From eb629de7fccd644044faa000261e5cad906a0e2f Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 28 Jul 2025 13:42:25 +0200 Subject: [PATCH 26/27] do not calculate token usage twice --- sentry_sdk/integrations/openai.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 72363590bb..98e83607dd 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -226,6 +226,7 @@ def _set_output_data(span, response, kwargs, integration, finish_span=True): SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(response_text), ) + _calculate_token_usage(messages, response, span, None, integration.count_tokens) if finish_span: span.__exit__(None, None, None) @@ -238,6 +239,7 @@ def _set_output_data(span, response, kwargs, integration, finish_span=True): SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(response_text), ) + _calculate_token_usage(messages, response, span, None, integration.count_tokens) if finish_span: span.__exit__(None, None, None) @@ -360,11 +362,10 @@ async def new_iterator_async(): else: response._iterator = new_iterator() else: + _calculate_token_usage(messages, response, span, None, integration.count_tokens) if finish_span: span.__exit__(None, None, None) - _calculate_token_usage(messages, response, span, None, integration.count_tokens) - def _new_chat_completion_common(f, *args, **kwargs): # type: (Any, Any, Any) -> Any From 1f4ed0cd242628ebe51e37ead42d9f375e81d2d6 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 28 Jul 2025 13:54:09 +0200 Subject: [PATCH 27/27] updated test --- tests/integrations/openai/test_openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index fe282d0853..dfac08d762 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1159,11 +1159,11 @@ async def test_ai_client_span_streaming_responses_async_api( assert spans[0]["op"] == "gen_ai.responses" assert spans[0]["origin"] == "auto.ai.openai" assert spans[0]["data"] == { - "ai.streaming": True, "gen_ai.operation.name": "responses", "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?", "gen_ai.request.model": "gpt-4o", "gen_ai.response.model": "response-model-id", + "gen_ai.response.streaming": True, "gen_ai.system": "openai", "gen_ai.usage.input_tokens": 20, "gen_ai.usage.input_tokens.cached": 5,