From f2edb389d0ff06671bccc5f05e25c365d4fe3048 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 23 Jul 2025 10:00:48 +0200
Subject: [PATCH 01/27] Fixed span names to be otel compatible

---
 sentry_sdk/integrations/openai.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index bf310e5fdc..993be09a31 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -180,7 +180,7 @@ def _new_chat_completion_common(f, *args, **kwargs):
 
     span = sentry_sdk.start_span(
         op=consts.OP.GEN_AI_CHAT,
-        name=f"{consts.OP.GEN_AI_CHAT} {model}",
+        name=f"chat {model}",
         origin=OpenAIIntegration.origin,
     )
     span.__enter__()
@@ -365,7 +365,7 @@ def _new_embeddings_create_common(f, *args, **kwargs):
 
     with sentry_sdk.start_span(
         op=consts.OP.GEN_AI_EMBEDDINGS,
-        name=f"{consts.OP.GEN_AI_EMBEDDINGS} {model}",
+        name=f"embeddings {model}",
         origin=OpenAIIntegration.origin,
     ) as span:
         set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
@@ -491,7 +491,7 @@ def _new_responses_create_common(f, *args, **kwargs):
 
     span = sentry_sdk.start_span(
         op=consts.OP.GEN_AI_RESPONSES,
-        name=f"{consts.OP.GEN_AI_RESPONSES} {model}",
+        name=f"responses {model}",
         origin=OpenAIIntegration.origin,
     )
     span.__enter__()

From a29716b6b9fa18f7b8cf41df676e7a14a7f7590e Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 23 Jul 2025 10:21:32 +0200
Subject: [PATCH 02/27] Add common otel attributes

---
 sentry_sdk/integrations/openai.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 993be09a31..6fcf0ba5b7 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -124,13 +124,11 @@ def _calculate_token_usage(
         total_tokens = _get_usage(response.usage, ["total_tokens"])
 
     # Manually count tokens
-    # TODO: when implementing responses API, check for responses API
     if input_tokens == 0:
         for message in messages:
             if "content" in message:
                 input_tokens += count_tokens(message["content"])
 
-    # TODO: when implementing responses API, check for responses API
     if output_tokens == 0:
         if streaming_message_responses is not None:
             for message in streaming_message_responses:
@@ -191,7 +189,9 @@ def _new_chat_completion_common(f, *args, **kwargs):
         if should_send_default_pii() and integration.include_prompts:
             set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages)
 
+        set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
         set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
+        set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
         set_data_normalized(span, SPANDATA.AI_STREAMING, streaming)
 
         if hasattr(res, "choices"):
@@ -368,7 +368,9 @@ def _new_embeddings_create_common(f, *args, **kwargs):
         name=f"embeddings {model}",
         origin=OpenAIIntegration.origin,
     ) as span:
+        set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
         set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
+        set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings")
 
         if "input" in kwargs and (
             should_send_default_pii() and integration.include_prompts
@@ -496,7 +498,9 @@ def _new_responses_create_common(f, *args, **kwargs):
     )
     span.__enter__()
 
+    set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
     set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
+    set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses")
 
     if should_send_default_pii() and integration.include_prompts:
         set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, input)

From 5b11b7fbd2271a533b58b9369446e15ad6a5bb02 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 23 Jul 2025 13:50:32 +0200
Subject: [PATCH 03/27] attributes on chat completion

---
 sentry_sdk/consts.py                          |  2 +-
 sentry_sdk/integrations/openai.py             | 45 +++++++++++++----
 .../integrations/openai_agents/utils.py       | 50 +------------------
 sentry_sdk/utils.py                           | 46 +++++++++++++++++
 4 files changed, 84 insertions(+), 59 deletions(-)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index 386aa1040e..4ad4acb64f 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -417,7 +417,7 @@ class SPANDATA:
     GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
     """
     The model identifier being used for the request.
-    Example: "gpt-4-turbo-preview"
+    Example: "gpt-4-turbo"
     """
 
     GEN_AI_REQUEST_PRESENCE_PENALTY = "gen_ai.request.presence_penalty"
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 6fcf0ba5b7..a344039f38 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -11,6 +11,7 @@
 from sentry_sdk.utils import (
     capture_internal_exceptions,
     event_from_exception,
+    safe_serialize,
 )
 
 from typing import TYPE_CHECKING
@@ -183,24 +184,50 @@ def _new_chat_completion_common(f, *args, **kwargs):
     )
     span.__enter__()
 
+    # Common attributes
+    set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
+    set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
+    set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
+    set_data_normalized(span, SPANDATA.AI_STREAMING, streaming)
+
+    # Optional attributes
+    max_tokens = kwargs.get("max_tokens")
+    if max_tokens is not None:
+        set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, max_tokens)
+
+    presence_penalty = kwargs.get("presence_penalty")
+    if presence_penalty is not None:
+        set_data_normalized(
+            span, SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, presence_penalty
+        )
+
+    temperature = kwargs.get("temperature")
+    if temperature is not None:
+        set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TEMPERATURE, temperature)
+
+    top_p = kwargs.get("top_p")
+    if top_p is not None:
+        set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TOP_P, top_p)
+
     res = yield f, args, kwargs
 
     with capture_internal_exceptions():
         if should_send_default_pii() and integration.include_prompts:
             set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages)
 
-        set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
-        set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
-        set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
-        set_data_normalized(span, SPANDATA.AI_STREAMING, streaming)
+        if hasattr(res, "model"):
+            set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, res.model)
 
         if hasattr(res, "choices"):
             if should_send_default_pii() and integration.include_prompts:
-                set_data_normalized(
-                    span,
-                    SPANDATA.GEN_AI_RESPONSE_TEXT,
-                    list(map(lambda x: x.message, res.choices)),
-                )
+                response_text = [choice.message.dict() for choice in res.choices]
+                if len(response_text) > 0:
+                    set_data_normalized(
+                        span,
+                        SPANDATA.GEN_AI_RESPONSE_TEXT,
+                        safe_serialize(response_text),
+                    )
+
             _calculate_token_usage(messages, res, span, None, integration.count_tokens)
             span.__exit__(None, None, None)
         elif hasattr(res, "_iterator"):
diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
index dc66521c83..1525346726 100644
--- a/sentry_sdk/integrations/openai_agents/utils.py
+++ b/sentry_sdk/integrations/openai_agents/utils.py
@@ -1,16 +1,14 @@
-import json
 import sentry_sdk
 from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations import DidNotEnable
 from sentry_sdk.scope import should_send_default_pii
-from sentry_sdk.utils import event_from_exception
+from sentry_sdk.utils import event_from_exception, safe_serialize
 
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from typing import Any
     from typing import Callable
-    from typing import Union
     from agents import Usage
 
 try:
@@ -162,49 +160,3 @@ def _set_output_data(span, result):
         span.set_data(
             SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(output_messages["response"])
         )
-
-
-def safe_serialize(data):
-    # type: (Any) -> str
-    """Safely serialize to a readable string."""
-
-    def serialize_item(item):
-        # type: (Any) -> Union[str, dict[Any, Any], list[Any], tuple[Any, ...]]
-        if callable(item):
-            try:
-                module = getattr(item, "__module__", None)
-                qualname = getattr(item, "__qualname__", None)
-                name = getattr(item, "__name__", "anonymous")
-
-                if module and qualname:
-                    full_path = f"{module}.{qualname}"
-                elif module and name:
-                    full_path = f"{module}.{name}"
-                else:
-                    full_path = name
-
-                return f"<function {full_path}>"
-            except Exception:
-                return f"<callable {type(item).__name__}>"
-        elif isinstance(item, dict):
-            return {k: serialize_item(v) for k, v in item.items()}
-        elif isinstance(item, (list, tuple)):
-            return [serialize_item(x) for x in item]
-        elif hasattr(item, "__dict__"):
-            try:
-                attrs = {
-                    k: serialize_item(v)
-                    for k, v in vars(item).items()
-                    if not k.startswith("_")
-                }
-                return f"<{type(item).__name__} {attrs}>"
-            except Exception:
-                return repr(item)
-        else:
-            return item
-
-    try:
-        serialized = serialize_item(data)
-        return json.dumps(serialized, default=str)
-    except Exception:
-        return str(data)
diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py
index 3b0ab8d746..9c6f2cfc3b 100644
--- a/sentry_sdk/utils.py
+++ b/sentry_sdk/utils.py
@@ -1938,3 +1938,49 @@ def try_convert(convert_func, value):
         return convert_func(value)
     except Exception:
         return None
+
+
+def safe_serialize(data):
+    # type: (Any) -> str
+    """Safely serialize to a readable string."""
+
+    def serialize_item(item):
+        # type: (Any) -> Union[str, dict[Any, Any], list[Any], tuple[Any, ...]]
+        if callable(item):
+            try:
+                module = getattr(item, "__module__", None)
+                qualname = getattr(item, "__qualname__", None)
+                name = getattr(item, "__name__", "anonymous")
+
+                if module and qualname:
+                    full_path = f"{module}.{qualname}"
+                elif module and name:
+                    full_path = f"{module}.{name}"
+                else:
+                    full_path = name
+
+                return f"<function {full_path}>"
+            except Exception:
+                return f"<callable {type(item).__name__}>"
+        elif isinstance(item, dict):
+            return {k: serialize_item(v) for k, v in item.items()}
+        elif isinstance(item, (list, tuple)):
+            return [serialize_item(x) for x in item]
+        elif hasattr(item, "__dict__"):
+            try:
+                attrs = {
+                    k: serialize_item(v)
+                    for k, v in vars(item).items()
+                    if not k.startswith("_")
+                }
+                return f"<{type(item).__name__} {attrs}>"
+            except Exception:
+                return repr(item)
+        else:
+            return item
+
+    try:
+        serialized = serialize_item(data)
+        return json.dumps(serialized, default=str)
+    except Exception:
+        return str(data)

From 2f04a2308e737515ba63f4b7401380d34b3604b2 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 23 Jul 2025 14:05:26 +0200
Subject: [PATCH 04/27] organize code

---
 sentry_sdk/integrations/openai.py | 243 ++++++++++++++++--------------
 1 file changed, 126 insertions(+), 117 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index a344039f38..07adecbb45 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -156,35 +156,19 @@ def _calculate_token_usage(
     )
 
 
-def _new_chat_completion_common(f, *args, **kwargs):
-    # type: (Any, Any, Any) -> Any
-    integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
-    if integration is None:
-        return f(*args, **kwargs)
+def _set_request_data(span, kwargs, integration):
+    # type: (Span, dict[str, Any], Integration) -> None
+    messages = kwargs.get("messages")
+    if (
+        messages is not None
+        and should_send_default_pii()
+        and integration.include_prompts
+    ):
+        set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages)
 
-    if "messages" not in kwargs:
-        # invalid call (in all versions of openai), let it return error
-        return f(*args, **kwargs)
-
-    try:
-        iter(kwargs["messages"])
-    except TypeError:
-        # invalid call (in all versions), messages must be iterable
-        return f(*args, **kwargs)
-
-    kwargs["messages"] = list(kwargs["messages"])
-    messages = kwargs["messages"]
+    # Common attributes
     model = kwargs.get("model")
     streaming = kwargs.get("stream")
-
-    span = sentry_sdk.start_span(
-        op=consts.OP.GEN_AI_CHAT,
-        name=f"chat {model}",
-        origin=OpenAIIntegration.origin,
-    )
-    span.__enter__()
-
-    # Common attributes
     set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
     set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
     set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
@@ -209,106 +193,131 @@ def _new_chat_completion_common(f, *args, **kwargs):
     if top_p is not None:
         set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TOP_P, top_p)
 
-    res = yield f, args, kwargs
 
-    with capture_internal_exceptions():
-        if should_send_default_pii() and integration.include_prompts:
-            set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages)
+def _set_response_data(span, res, kwargs, integration):
+    # type: (Span, Any, dict[str, Any], Integration) -> None
+    if hasattr(res, "model"):
+        set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, res.model)
+
+    messages = kwargs.get("messages", [])
 
-        if hasattr(res, "model"):
-            set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, res.model)
+    if hasattr(res, "choices"):
+        if should_send_default_pii() and integration.include_prompts:
+            response_text = [choice.message.dict() for choice in res.choices]
+            if len(response_text) > 0:
+                set_data_normalized(
+                    span,
+                    SPANDATA.GEN_AI_RESPONSE_TEXT,
+                    safe_serialize(response_text),
+                )
 
-        if hasattr(res, "choices"):
-            if should_send_default_pii() and integration.include_prompts:
-                response_text = [choice.message.dict() for choice in res.choices]
-                if len(response_text) > 0:
-                    set_data_normalized(
+        _calculate_token_usage(messages, res, span, None, integration.count_tokens)
+
+    elif hasattr(res, "_iterator"):
+        data_buf: list[list[str]] = []  # one for each choice
+
+        old_iterator = res._iterator
+
+        def new_iterator():
+            # type: () -> Iterator[ChatCompletionChunk]
+            with capture_internal_exceptions():
+                for x in old_iterator:
+                    if hasattr(x, "choices"):
+                        choice_index = 0
+                        for choice in x.choices:
+                            if hasattr(choice, "delta") and hasattr(
+                                choice.delta, "content"
+                            ):
+                                content = choice.delta.content
+                                if len(data_buf) <= choice_index:
+                                    data_buf.append([])
+                                data_buf[choice_index].append(content or "")
+                            choice_index += 1
+                    yield x
+                if len(data_buf) > 0:
+                    all_responses = list(map(lambda chunk: "".join(chunk), data_buf))
+                    if should_send_default_pii() and integration.include_prompts:
+                        set_data_normalized(
+                            span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
+                        )
+                    _calculate_token_usage(
+                        messages,
+                        res,
                         span,
-                        SPANDATA.GEN_AI_RESPONSE_TEXT,
-                        safe_serialize(response_text),
+                        all_responses,
+                        integration.count_tokens,
                     )
-
-            _calculate_token_usage(messages, res, span, None, integration.count_tokens)
             span.__exit__(None, None, None)
-        elif hasattr(res, "_iterator"):
-            data_buf: list[list[str]] = []  # one for each choice
-
-            old_iterator = res._iterator
-
-            def new_iterator():
-                # type: () -> Iterator[ChatCompletionChunk]
-                with capture_internal_exceptions():
-                    for x in old_iterator:
-                        if hasattr(x, "choices"):
-                            choice_index = 0
-                            for choice in x.choices:
-                                if hasattr(choice, "delta") and hasattr(
-                                    choice.delta, "content"
-                                ):
-                                    content = choice.delta.content
-                                    if len(data_buf) <= choice_index:
-                                        data_buf.append([])
-                                    data_buf[choice_index].append(content or "")
-                                choice_index += 1
-                        yield x
-                    if len(data_buf) > 0:
-                        all_responses = list(
-                            map(lambda chunk: "".join(chunk), data_buf)
-                        )
-                        if should_send_default_pii() and integration.include_prompts:
-                            set_data_normalized(
-                                span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
-                            )
-                        _calculate_token_usage(
-                            messages,
-                            res,
-                            span,
-                            all_responses,
-                            integration.count_tokens,
-                        )
-                span.__exit__(None, None, None)
-
-            async def new_iterator_async():
-                # type: () -> AsyncIterator[ChatCompletionChunk]
-                with capture_internal_exceptions():
-                    async for x in old_iterator:
-                        if hasattr(x, "choices"):
-                            choice_index = 0
-                            for choice in x.choices:
-                                if hasattr(choice, "delta") and hasattr(
-                                    choice.delta, "content"
-                                ):
-                                    content = choice.delta.content
-                                    if len(data_buf) <= choice_index:
-                                        data_buf.append([])
-                                    data_buf[choice_index].append(content or "")
-                                choice_index += 1
-                        yield x
-                    if len(data_buf) > 0:
-                        all_responses = list(
-                            map(lambda chunk: "".join(chunk), data_buf)
-                        )
-                        if should_send_default_pii() and integration.include_prompts:
-                            set_data_normalized(
-                                span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
-                            )
-                        _calculate_token_usage(
-                            messages,
-                            res,
-                            span,
-                            all_responses,
-                            integration.count_tokens,
-                        )
-                span.__exit__(None, None, None)
 
-            if str(type(res._iterator)) == "<class 'async_generator'>":
-                res._iterator = new_iterator_async()
-            else:
-                res._iterator = new_iterator()
+        async def new_iterator_async():
+            # type: () -> AsyncIterator[ChatCompletionChunk]
+            with capture_internal_exceptions():
+                async for x in old_iterator:
+                    if hasattr(x, "choices"):
+                        choice_index = 0
+                        for choice in x.choices:
+                            if hasattr(choice, "delta") and hasattr(
+                                choice.delta, "content"
+                            ):
+                                content = choice.delta.content
+                                if len(data_buf) <= choice_index:
+                                    data_buf.append([])
+                                data_buf[choice_index].append(content or "")
+                            choice_index += 1
+                    yield x
+                if len(data_buf) > 0:
+                    all_responses = list(map(lambda chunk: "".join(chunk), data_buf))
+                    if should_send_default_pii() and integration.include_prompts:
+                        set_data_normalized(
+                            span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
+                        )
+                    _calculate_token_usage(
+                        messages,
+                        res,
+                        span,
+                        all_responses,
+                        integration.count_tokens,
+                    )
+            span.__exit__(None, None, None)
 
+        if str(type(res._iterator)) == "<class 'async_generator'>":
+            res._iterator = new_iterator_async()
         else:
-            set_data_normalized(span, "unknown_response", True)
-            span.__exit__(None, None, None)
+            res._iterator = new_iterator()
+
+    else:
+        set_data_normalized(span, "unknown_response", True)
+
+
+def _new_chat_completion_common(f, *args, **kwargs):
+    # type: (Any, Any, Any) -> Any
+    integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
+    if integration is None:
+        return f(*args, **kwargs)
+
+    if "messages" not in kwargs:
+        # invalid call (in all versions of openai), let it return error
+        return f(*args, **kwargs)
+
+    try:
+        iter(kwargs["messages"])
+    except TypeError:
+        # invalid call (in all versions), messages must be iterable
+        return f(*args, **kwargs)
+
+    model = kwargs.get("model")
+
+    with sentry_sdk.start_span(
+        op=consts.OP.GEN_AI_CHAT,
+        name=f"chat {model}",
+        origin=OpenAIIntegration.origin,
+    ) as span:
+        _set_request_data(span, kwargs, integration)
+
+        res = yield f, args, kwargs
+
+        _set_response_data(span, res, kwargs, integration)
+
     return res
 
 

From 6b2f5de42df9b720a8eac3e14c1b72f59a3e8318 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 23 Jul 2025 16:06:58 +0200
Subject: [PATCH 05/27] refactor embeddings

---
 sentry_sdk/integrations/openai.py | 104 +++++++++++-------------------
 1 file changed, 39 insertions(+), 65 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 07adecbb45..af1795e3b8 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -156,11 +156,18 @@ def _calculate_token_usage(
     )
 
 
-def _set_request_data(span, kwargs, integration):
-    # type: (Span, dict[str, Any], Integration) -> None
+def _set_request_data(span, kwargs, operation, integration):
+    # type: (Span, dict[str, Any], str, Integration) -> None
     messages = kwargs.get("messages")
+    if messages is None:
+        messages = kwargs.get("input")
+
+    if isinstance(messages, str):
+        messages = [messages]
+
     if (
         messages is not None
+        and len(messages) > 0
         and should_send_default_pii()
         and integration.include_prompts
     ):
@@ -171,7 +178,7 @@ def _set_request_data(span, kwargs, integration):
     streaming = kwargs.get("stream")
     set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
     set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
-    set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
+    set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
     set_data_normalized(span, SPANDATA.AI_STREAMING, streaming)
 
     # Optional attributes
@@ -194,16 +201,21 @@ def _set_request_data(span, kwargs, integration):
         set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TOP_P, top_p)
 
 
-def _set_response_data(span, res, kwargs, integration):
+def _set_response_data(span, response, kwargs, integration):
     # type: (Span, Any, dict[str, Any], Integration) -> None
-    if hasattr(res, "model"):
-        set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, res.model)
+    if hasattr(response, "model"):
+        set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model)
+
+    messages = kwargs.get("messages")
+    if messages is None:
+        messages = kwargs.get("input")
 
-    messages = kwargs.get("messages", [])
+    if isinstance(messages, str):
+        messages = [messages]
 
-    if hasattr(res, "choices"):
+    if hasattr(response, "choices"):
         if should_send_default_pii() and integration.include_prompts:
-            response_text = [choice.message.dict() for choice in res.choices]
+            response_text = [choice.message.dict() for choice in response.choices]
             if len(response_text) > 0:
                 set_data_normalized(
                     span,
@@ -211,12 +223,10 @@ def _set_response_data(span, res, kwargs, integration):
                     safe_serialize(response_text),
                 )
 
-        _calculate_token_usage(messages, res, span, None, integration.count_tokens)
-
-    elif hasattr(res, "_iterator"):
+    elif hasattr(response, "_iterator"):
         data_buf: list[list[str]] = []  # one for each choice
 
-        old_iterator = res._iterator
+        old_iterator = response._iterator
 
         def new_iterator():
             # type: () -> Iterator[ChatCompletionChunk]
@@ -242,7 +252,7 @@ def new_iterator():
                         )
                     _calculate_token_usage(
                         messages,
-                        res,
+                        response,
                         span,
                         all_responses,
                         integration.count_tokens,
@@ -273,20 +283,19 @@ async def new_iterator_async():
                         )
                     _calculate_token_usage(
                         messages,
-                        res,
+                        response,
                         span,
                         all_responses,
                         integration.count_tokens,
                     )
             span.__exit__(None, None, None)
 
-        if str(type(res._iterator)) == "<class 'async_generator'>":
-            res._iterator = new_iterator_async()
+        if str(type(response._iterator)) == "<class 'async_generator'>":
+            response._iterator = new_iterator_async()
         else:
-            res._iterator = new_iterator()
+            response._iterator = new_iterator()
 
-    else:
-        set_data_normalized(span, "unknown_response", True)
+    _calculate_token_usage(messages, response, span, None, integration.count_tokens)
 
 
 def _new_chat_completion_common(f, *args, **kwargs):
@@ -306,19 +315,20 @@ def _new_chat_completion_common(f, *args, **kwargs):
         return f(*args, **kwargs)
 
     model = kwargs.get("model")
+    operation = "chat"
 
     with sentry_sdk.start_span(
         op=consts.OP.GEN_AI_CHAT,
-        name=f"chat {model}",
+        name=f"{operation} {model}",
         origin=OpenAIIntegration.origin,
     ) as span:
-        _set_request_data(span, kwargs, integration)
+        _set_request_data(span, kwargs, operation, integration)
 
-        res = yield f, args, kwargs
+        response = yield f, args, kwargs
 
-        _set_response_data(span, res, kwargs, integration)
+        _set_response_data(span, response, kwargs, integration)
 
-    return res
+    return response
 
 
 def _wrap_chat_completion_create(f):
@@ -398,54 +408,18 @@ def _new_embeddings_create_common(f, *args, **kwargs):
         return f(*args, **kwargs)
 
     model = kwargs.get("model")
+    operation = "embeddings"
 
     with sentry_sdk.start_span(
         op=consts.OP.GEN_AI_EMBEDDINGS,
-        name=f"embeddings {model}",
+        name=f"{operation} {model}",
         origin=OpenAIIntegration.origin,
     ) as span:
-        set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
-        set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
-        set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings")
-
-        if "input" in kwargs and (
-            should_send_default_pii() and integration.include_prompts
-        ):
-            if isinstance(kwargs["input"], str):
-                set_data_normalized(
-                    span, SPANDATA.GEN_AI_REQUEST_MESSAGES, [kwargs["input"]]
-                )
-            elif (
-                isinstance(kwargs["input"], list)
-                and len(kwargs["input"]) > 0
-                and isinstance(kwargs["input"][0], str)
-            ):
-                set_data_normalized(
-                    span, SPANDATA.GEN_AI_REQUEST_MESSAGES, kwargs["input"]
-                )
+        _set_request_data(span, kwargs, operation, integration)
 
         response = yield f, args, kwargs
 
-        input_tokens = 0
-        total_tokens = 0
-        if hasattr(response, "usage"):
-            if hasattr(response.usage, "prompt_tokens") and isinstance(
-                response.usage.prompt_tokens, int
-            ):
-                input_tokens = response.usage.prompt_tokens
-            if hasattr(response.usage, "total_tokens") and isinstance(
-                response.usage.total_tokens, int
-            ):
-                total_tokens = response.usage.total_tokens
-
-        if input_tokens == 0:
-            input_tokens = integration.count_tokens(kwargs["input"] or "")
-
-        record_token_usage(
-            span,
-            input_tokens=input_tokens,
-            total_tokens=total_tokens or input_tokens,
-        )
+        _set_response_data(span, response, kwargs, integration)
 
         return response
 

From ac4f406973d0806865123841ce147d7b3979d4c3 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 23 Jul 2025 16:35:17 +0200
Subject: [PATCH 06/27] cleanup respones

---
 sentry_sdk/integrations/openai.py | 46 ++++++++++++-------------------
 1 file changed, 18 insertions(+), 28 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index af1795e3b8..9c7eea2af6 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -1,5 +1,4 @@
 from functools import wraps
-import json
 
 import sentry_sdk
 from sentry_sdk import consts
@@ -223,6 +222,16 @@ def _set_response_data(span, response, kwargs, integration):
                     safe_serialize(response_text),
                 )
 
+    elif hasattr(response, "output"):
+        if should_send_default_pii() and integration.include_prompts:
+            response_text = [item.to_dict() for item in response.output]
+            if len(response_text) > 0:
+                set_data_normalized(
+                    span,
+                    SPANDATA.GEN_AI_RESPONSE_TEXT,
+                    safe_serialize(response_text),
+                )
+
     elif hasattr(response, "_iterator"):
         data_buf: list[list[str]] = []  # one for each choice
 
@@ -499,39 +508,20 @@ def _new_responses_create_common(f, *args, **kwargs):
         return f(*args, **kwargs)
 
     model = kwargs.get("model")
-    input = kwargs.get("input")
+    operation = "responses"
 
-    span = sentry_sdk.start_span(
+    with sentry_sdk.start_span(
         op=consts.OP.GEN_AI_RESPONSES,
-        name=f"responses {model}",
+        name=f"{operation} {model}",
         origin=OpenAIIntegration.origin,
-    )
-    span.__enter__()
-
-    set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
-    set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
-    set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses")
-
-    if should_send_default_pii() and integration.include_prompts:
-        set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, input)
-
-    res = yield f, args, kwargs
-
-    if hasattr(res, "output"):
-        if should_send_default_pii() and integration.include_prompts:
-            set_data_normalized(
-                span,
-                SPANDATA.GEN_AI_RESPONSE_TEXT,
-                json.dumps([item.to_dict() for item in res.output]),
-            )
-        _calculate_token_usage([], res, span, None, integration.count_tokens)
+    ) as span:
+        _set_request_data(span, kwargs, operation, integration)
 
-    else:
-        set_data_normalized(span, "unknown_response", True)
+        response = yield f, args, kwargs
 
-    span.__exit__(None, None, None)
+        _set_response_data(span, response, kwargs, integration)
 
-    return res
+        return response
 
 
 def _wrap_responses_create(f):

From b54b8dc658a6819b75f59d2192cd78de7ee9001d Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 23 Jul 2025 16:49:33 +0200
Subject: [PATCH 07/27] Add available tools

---
 sentry_sdk/integrations/openai.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 9c7eea2af6..17ba1831a1 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -191,6 +191,12 @@ def _set_request_data(span, kwargs, operation, integration):
             span, SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, presence_penalty
         )
 
+    frequency_penalty = kwargs.get("frequency_penalty")
+    if frequency_penalty is not None:
+        set_data_normalized(
+            span, SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, frequency_penalty
+        )
+
     temperature = kwargs.get("temperature")
     if temperature is not None:
         set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TEMPERATURE, temperature)
@@ -199,6 +205,13 @@ def _set_request_data(span, kwargs, operation, integration):
     if top_p is not None:
         set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TOP_P, top_p)
 
+    # Tools
+    tools = kwargs.get("tools", [])
+    if tools is not None and len(tools) > 0:
+        set_data_normalized(
+            span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools)
+        )
+
 
 def _set_response_data(span, response, kwargs, integration):
     # type: (Span, Any, dict[str, Any], Integration) -> None

From 1bbb77215c7aa9ee90e83fe28f33ed7c2e5f208b Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 23 Jul 2025 17:12:08 +0200
Subject: [PATCH 08/27] updated some tests

---
 sentry_sdk/integrations/openai.py        |  6 ++++--
 tests/integrations/openai/test_openai.py | 18 ++++++++++++++----
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 17ba1831a1..e5fc412ef2 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -174,13 +174,15 @@ def _set_request_data(span, kwargs, operation, integration):
 
     # Common attributes
     model = kwargs.get("model")
-    streaming = kwargs.get("stream")
     set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
     set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
     set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
-    set_data_normalized(span, SPANDATA.AI_STREAMING, streaming)
 
     # Optional attributes
+    streaming = kwargs.get("stream")
+    if streaming is not None:
+        set_data_normalized(span, SPANDATA.AI_STREAMING, streaming)
+
     max_tokens = kwargs.get("max_tokens")
     if max_tokens is not None:
         set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, max_tokens)
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index f6b18e6908..4291626319 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1,3 +1,4 @@
+import json
 import pytest
 from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError
 from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding
@@ -52,7 +53,7 @@ async def __call__(self, *args, **kwargs):
         )
     ],
     created=10000000,
-    model="model-id",
+    model="response-model-id",
     object="chat.completion",
     usage=CompletionUsage(
         completion_tokens=10,
@@ -86,7 +87,7 @@ async def __call__(self, *args, **kwargs):
         tool_choice="none",
         tools=[],
         created_at=10000000,
-        model="model-id",
+        model="response-model-id",
         object="response",
         usage=ResponseUsage(
             input_tokens=20,
@@ -143,7 +144,7 @@ def test_nonstreaming_chat_completion(
         assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]["content"]
         assert (
             "the model response"
-            in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]["content"]
+            in json.loads(span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT])[0]["content"]
         )
     else:
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
@@ -188,7 +189,7 @@ async def test_nonstreaming_chat_completion_async(
         assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]["content"]
         assert (
             "the model response"
-            in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]["content"]
+            in json.loads(span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT])[0]["content"]
         )
     else:
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
@@ -986,7 +987,10 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events):
     assert spans[0]["op"] == "gen_ai.responses"
     assert spans[0]["origin"] == "auto.ai.openai"
     assert spans[0]["data"] == {
+        "gen_ai.operation.name": "responses",
         "gen_ai.request.model": "gpt-4o",
+        "gen_ai.response.model": "response-model-id",
+        "gen_ai.system": "openai",
         "gen_ai.usage.input_tokens": 20,
         "gen_ai.usage.input_tokens.cached": 5,
         "gen_ai.usage.output_tokens": 10,
@@ -1026,8 +1030,11 @@ def test_ai_client_span_responses_api(sentry_init, capture_events):
     assert spans[0]["op"] == "gen_ai.responses"
     assert spans[0]["origin"] == "auto.ai.openai"
     assert spans[0]["data"] == {
+        "gen_ai.operation.name": "responses",
         "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?",
         "gen_ai.request.model": "gpt-4o",
+        "gen_ai.system": "openai",
+        "gen_ai.response.model": "response-model-id",
         "gen_ai.usage.input_tokens": 20,
         "gen_ai.usage.input_tokens.cached": 5,
         "gen_ai.usage.output_tokens": 10,
@@ -1103,8 +1110,11 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events):
     assert spans[0]["op"] == "gen_ai.responses"
     assert spans[0]["origin"] == "auto.ai.openai"
     assert spans[0]["data"] == {
+        "gen_ai.operation.name": "responses",
         "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?",
         "gen_ai.request.model": "gpt-4o",
+        "gen_ai.response.model": "response-model-id",
+        "gen_ai.system": "openai",
         "gen_ai.usage.input_tokens": 20,
         "gen_ai.usage.input_tokens.cached": 5,
         "gen_ai.usage.output_tokens": 10,

From c80a4137d5c11db320d189fc8f4508b3bc7cf33f Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Thu, 24 Jul 2025 09:18:37 +0200
Subject: [PATCH 09/27] fixed tests

---
 sentry_sdk/integrations/openai.py        | 40 +++++++++++++-------
 tests/integrations/openai/test_openai.py | 47 ++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 14 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index e5fc412ef2..812a41e17a 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -155,6 +155,7 @@ def _calculate_token_usage(
     )
 
 
+# TODO: rename to _set_input_data and _set_output_data
 def _set_request_data(span, kwargs, operation, integration):
     # type: (Span, dict[str, Any], str, Integration) -> None
     messages = kwargs.get("messages")
@@ -172,6 +173,7 @@ def _set_request_data(span, kwargs, operation, integration):
     ):
         set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages)
 
+    # TODO: make mapping and loop over kwargs to set attributes
     # Common attributes
     model = kwargs.get("model")
     set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
@@ -215,8 +217,8 @@ def _set_request_data(span, kwargs, operation, integration):
         )
 
 
-def _set_response_data(span, response, kwargs, integration):
-    # type: (Span, Any, dict[str, Any], Integration) -> None
+def _set_response_data(span, response, kwargs, integration, finish_span=True):
+    # type: (Span, Any, dict[str, Any], Integration, bool) -> None
     if hasattr(response, "model"):
         set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model)
 
@@ -236,6 +238,7 @@ def _set_response_data(span, response, kwargs, integration):
                     SPANDATA.GEN_AI_RESPONSE_TEXT,
                     safe_serialize(response_text),
                 )
+        span.__exit__(None, None, None)
 
     elif hasattr(response, "output"):
         if should_send_default_pii() and integration.include_prompts:
@@ -318,6 +321,10 @@ async def new_iterator_async():
             response._iterator = new_iterator_async()
         else:
             response._iterator = new_iterator()
+    else:
+        set_data_normalized(span, "unknown_response", True)
+        if finish_span:
+            span.__exit__(None, None, None)
 
     _calculate_token_usage(messages, response, span, None, integration.count_tokens)
 
@@ -341,16 +348,18 @@ def _new_chat_completion_common(f, *args, **kwargs):
     model = kwargs.get("model")
     operation = "chat"
 
-    with sentry_sdk.start_span(
+    span = sentry_sdk.start_span(
         op=consts.OP.GEN_AI_CHAT,
         name=f"{operation} {model}",
         origin=OpenAIIntegration.origin,
-    ) as span:
-        _set_request_data(span, kwargs, operation, integration)
+    )
+    span.__enter__()
 
-        response = yield f, args, kwargs
+    _set_request_data(span, kwargs, operation, integration)
 
-        _set_response_data(span, response, kwargs, integration)
+    response = yield f, args, kwargs
+
+    _set_response_data(span, response, kwargs, integration)
 
     return response
 
@@ -443,7 +452,7 @@ def _new_embeddings_create_common(f, *args, **kwargs):
 
         response = yield f, args, kwargs
 
-        _set_response_data(span, response, kwargs, integration)
+        _set_response_data(span, response, kwargs, integration, finish_span=False)
 
         return response
 
@@ -525,18 +534,21 @@ def _new_responses_create_common(f, *args, **kwargs):
     model = kwargs.get("model")
     operation = "responses"
 
-    with sentry_sdk.start_span(
+    span = sentry_sdk.start_span(
         op=consts.OP.GEN_AI_RESPONSES,
         name=f"{operation} {model}",
         origin=OpenAIIntegration.origin,
-    ) as span:
-        _set_request_data(span, kwargs, operation, integration)
+    )
+    span.__enter__()
+    _set_request_data(span, kwargs, operation, integration)
 
-        response = yield f, args, kwargs
+    response = yield f, args, kwargs
 
-        _set_response_data(span, response, kwargs, integration)
+    _set_response_data(span, response, kwargs, integration)
 
-        return response
+    span.__exit__(None, None, None)
+
+    return response
 
 
 def _wrap_responses_create(f):
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 4291626319..a08e319bc1 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1126,6 +1126,53 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events):
     }
 
 
+@pytest.mark.asyncio
+@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
+async def test_ai_client_span_streaming_responses_async_api(
+    sentry_init, capture_events
+):
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    client = AsyncOpenAI(api_key="z")
+    client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE)
+
+    with start_transaction(name="openai tx"):
+        await client.responses.create(
+            model="gpt-4o",
+            instructions="You are a coding assistant that talks like a pirate.",
+            input="How do I check if a Python object is an instance of a class?",
+            stream=True,
+        )
+
+    (transaction,) = events
+    spans = transaction["spans"]
+
+    assert len(spans) == 1
+    assert spans[0]["op"] == "gen_ai.responses"
+    assert spans[0]["origin"] == "auto.ai.openai"
+    assert spans[0]["data"] == {
+        "ai.streaming": True,
+        "gen_ai.operation.name": "responses",
+        "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?",
+        "gen_ai.request.model": "gpt-4o",
+        "gen_ai.response.model": "response-model-id",
+        "gen_ai.system": "openai",
+        "gen_ai.usage.input_tokens": 20,
+        "gen_ai.usage.input_tokens.cached": 5,
+        "gen_ai.usage.output_tokens": 10,
+        "gen_ai.usage.output_tokens.reasoning": 8,
+        "gen_ai.usage.total_tokens": 30,
+        "gen_ai.response.text": '[{"id": "message-id", "content": [{"annotations": [], "text": "the model response", "type": "output_text"}], "role": "assistant", "status": "completed", "type": "message"}]',
+        "thread.id": mock.ANY,
+        "thread.name": mock.ANY,
+    }
+
+
 @pytest.mark.asyncio
 @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 async def test_error_in_responses_async_api(sentry_init, capture_events):

From 3ad5e509d355658288c6349ba172168efd6be03f Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Thu, 24 Jul 2025 09:29:11 +0200
Subject: [PATCH 10/27] cleanup

---
 sentry_sdk/integrations/openai.py | 83 +++++++++++++------------------
 1 file changed, 35 insertions(+), 48 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 812a41e17a..ed22dd51a0 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -156,8 +156,9 @@ def _calculate_token_usage(
 
 
 # TODO: rename to _set_input_data and _set_output_data
-def _set_request_data(span, kwargs, operation, integration):
+def _set_input_data(span, kwargs, operation, integration):
     # type: (Span, dict[str, Any], str, Integration) -> None
+    # Input messages (the prompt or data sent to the model)
     messages = kwargs.get("messages")
     if messages is None:
         messages = kwargs.get("input")
@@ -173,43 +174,26 @@ def _set_request_data(span, kwargs, operation, integration):
     ):
         set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages)
 
-    # TODO: make mapping and loop over kwargs to set attributes
-    # Common attributes
-    model = kwargs.get("model")
+    # Input attributes: Common
     set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
-    set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
     set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
 
-    # Optional attributes
-    streaming = kwargs.get("stream")
-    if streaming is not None:
-        set_data_normalized(span, SPANDATA.AI_STREAMING, streaming)
-
-    max_tokens = kwargs.get("max_tokens")
-    if max_tokens is not None:
-        set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, max_tokens)
-
-    presence_penalty = kwargs.get("presence_penalty")
-    if presence_penalty is not None:
-        set_data_normalized(
-            span, SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, presence_penalty
-        )
-
-    frequency_penalty = kwargs.get("frequency_penalty")
-    if frequency_penalty is not None:
-        set_data_normalized(
-            span, SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, frequency_penalty
-        )
-
-    temperature = kwargs.get("temperature")
-    if temperature is not None:
-        set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TEMPERATURE, temperature)
-
-    top_p = kwargs.get("top_p")
-    if top_p is not None:
-        set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_TOP_P, top_p)
-
-    # Tools
+    # Input attributes: Optional
+    kwargs_keys_to_attributes = {
+        "model": SPANDATA.GEN_AI_REQUEST_MODEL,
+        "stream": SPANDATA.AI_STREAMING,
+        "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
+        "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY,
+        "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+        "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE,
+        "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
+    }
+    for key, attribute in kwargs_keys_to_attributes.items():
+        value = kwargs.get(key)
+        if value is not None:
+            set_data_normalized(span, attribute, value)
+
+    # Input attributes: Tools
     tools = kwargs.get("tools", [])
     if tools is not None and len(tools) > 0:
         set_data_normalized(
@@ -217,7 +201,7 @@ def _set_request_data(span, kwargs, operation, integration):
         )
 
 
-def _set_response_data(span, response, kwargs, integration, finish_span=True):
+def _set_output_data(span, response, kwargs, integration, finish_span=True):
     # type: (Span, Any, dict[str, Any], Integration, bool) -> None
     if hasattr(response, "model"):
         set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model)
@@ -238,7 +222,8 @@ def _set_response_data(span, response, kwargs, integration, finish_span=True):
                     SPANDATA.GEN_AI_RESPONSE_TEXT,
                     safe_serialize(response_text),
                 )
-        span.__exit__(None, None, None)
+        if finish_span:
+            span.__exit__(None, None, None)
 
     elif hasattr(response, "output"):
         if should_send_default_pii() and integration.include_prompts:
@@ -249,6 +234,8 @@ def _set_response_data(span, response, kwargs, integration, finish_span=True):
                     SPANDATA.GEN_AI_RESPONSE_TEXT,
                     safe_serialize(response_text),
                 )
+        if finish_span:
+            span.__exit__(None, None, None)
 
     elif hasattr(response, "_iterator"):
         data_buf: list[list[str]] = []  # one for each choice
@@ -284,7 +271,8 @@ def new_iterator():
                         all_responses,
                         integration.count_tokens,
                     )
-            span.__exit__(None, None, None)
+            if finish_span:
+                span.__exit__(None, None, None)
 
         async def new_iterator_async():
             # type: () -> AsyncIterator[ChatCompletionChunk]
@@ -315,14 +303,14 @@ async def new_iterator_async():
                         all_responses,
                         integration.count_tokens,
                     )
-            span.__exit__(None, None, None)
+            if finish_span:
+                span.__exit__(None, None, None)
 
         if str(type(response._iterator)) == "<class 'async_generator'>":
             response._iterator = new_iterator_async()
         else:
             response._iterator = new_iterator()
     else:
-        set_data_normalized(span, "unknown_response", True)
         if finish_span:
             span.__exit__(None, None, None)
 
@@ -355,11 +343,11 @@ def _new_chat_completion_common(f, *args, **kwargs):
     )
     span.__enter__()
 
-    _set_request_data(span, kwargs, operation, integration)
+    _set_input_data(span, kwargs, operation, integration)
 
     response = yield f, args, kwargs
 
-    _set_response_data(span, response, kwargs, integration)
+    _set_output_data(span, response, kwargs, integration, finish_span=True)
 
     return response
 
@@ -448,11 +436,11 @@ def _new_embeddings_create_common(f, *args, **kwargs):
         name=f"{operation} {model}",
         origin=OpenAIIntegration.origin,
     ) as span:
-        _set_request_data(span, kwargs, operation, integration)
+        _set_input_data(span, kwargs, operation, integration)
 
         response = yield f, args, kwargs
 
-        _set_response_data(span, response, kwargs, integration, finish_span=False)
+        _set_output_data(span, response, kwargs, integration, finish_span=False)
 
         return response
 
@@ -540,13 +528,12 @@ def _new_responses_create_common(f, *args, **kwargs):
         origin=OpenAIIntegration.origin,
     )
     span.__enter__()
-    _set_request_data(span, kwargs, operation, integration)
 
-    response = yield f, args, kwargs
+    _set_input_data(span, kwargs, operation, integration)
 
-    _set_response_data(span, response, kwargs, integration)
+    response = yield f, args, kwargs
 
-    span.__exit__(None, None, None)
+    _set_output_data(span, response, kwargs, integration, finish_span=True)
 
     return response
 

From dddaab2c974fba62d1543780d65855f6f27bc6ca Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Thu, 24 Jul 2025 10:21:07 +0200
Subject: [PATCH 11/27] fix token count for streaming responses api

---
 sentry_sdk/integrations/openai.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index ed22dd51a0..319790c11f 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -126,8 +126,10 @@ def _calculate_token_usage(
     # Manually count tokens
     if input_tokens == 0:
         for message in messages:
-            if "content" in message:
+            if isinstance(message, dict) and "content" in message:
                 input_tokens += count_tokens(message["content"])
+            elif isinstance(message, str):
+                input_tokens += count_tokens(message)
 
     if output_tokens == 0:
         if streaming_message_responses is not None:
@@ -246,6 +248,7 @@ def new_iterator():
             # type: () -> Iterator[ChatCompletionChunk]
             with capture_internal_exceptions():
                 for x in old_iterator:
+                    # OpenAI chat completion API
                     if hasattr(x, "choices"):
                         choice_index = 0
                         for choice in x.choices:
@@ -257,6 +260,11 @@ def new_iterator():
                                     data_buf.append([])
                                 data_buf[choice_index].append(content or "")
                             choice_index += 1
+                    # OpenAI responses API
+                    elif hasattr(x, "delta"):
+                        if len(data_buf) == 0:
+                            data_buf.append([])
+                        data_buf[0].append(x.delta or "")
                     yield x
                 if len(data_buf) > 0:
                     all_responses = list(map(lambda chunk: "".join(chunk), data_buf))

From a83fca9c666d1198433fca0110d3736656c40c73 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Thu, 24 Jul 2025 11:06:49 +0200
Subject: [PATCH 12/27] fixed streaming responses token count

---
 sentry_sdk/integrations/openai.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 319790c11f..c55a095aaa 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -286,6 +286,7 @@ async def new_iterator_async():
             # type: () -> AsyncIterator[ChatCompletionChunk]
             with capture_internal_exceptions():
                 async for x in old_iterator:
+                    # OpenAI chat completion API
                     if hasattr(x, "choices"):
                         choice_index = 0
                         for choice in x.choices:
@@ -297,6 +298,11 @@ async def new_iterator_async():
                                     data_buf.append([])
                                 data_buf[choice_index].append(content or "")
                             choice_index += 1
+                    # OpenAI responses API
+                    elif hasattr(x, "delta"):
+                        if len(data_buf) == 0:
+                            data_buf.append([])
+                        data_buf[0].append(x.delta or "")
                     yield x
                 if len(data_buf) > 0:
                     all_responses = list(map(lambda chunk: "".join(chunk), data_buf))

From c589030da801cabf4ce9244ff9aa768202399890 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Fri, 25 Jul 2025 09:30:16 +0200
Subject: [PATCH 13/27] typing

---
 sentry_sdk/integrations/openai.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index c55a095aaa..c1efd9e485 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -157,9 +157,8 @@ def _calculate_token_usage(
     )
 
 
-# TODO: rename to _set_input_data and _set_output_data
 def _set_input_data(span, kwargs, operation, integration):
-    # type: (Span, dict[str, Any], str, Integration) -> None
+    # type: (Span, dict[str, Any], str, OpenAIIntegration) -> None
     # Input messages (the prompt or data sent to the model)
     messages = kwargs.get("messages")
     if messages is None:
@@ -204,7 +203,7 @@ def _set_input_data(span, kwargs, operation, integration):
 
 
 def _set_output_data(span, response, kwargs, integration, finish_span=True):
-    # type: (Span, Any, dict[str, Any], Integration, bool) -> None
+    # type: (Span, Any, dict[str, Any], OpenAIIntegration, bool) -> None
     if hasattr(response, "model"):
         set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model)
 

From 45300bff3f72df292d5f91dc98c3f9f6e78fd2cf Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Fri, 25 Jul 2025 09:43:16 +0200
Subject: [PATCH 14/27] typing

---
 sentry_sdk/integrations/openai.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index c1efd9e485..293b2ea864 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -160,8 +160,8 @@ def _calculate_token_usage(
 def _set_input_data(span, kwargs, operation, integration):
     # type: (Span, dict[str, Any], str, OpenAIIntegration) -> None
     # Input messages (the prompt or data sent to the model)
-    messages = kwargs.get("messages")
-    if messages is None:
+    messages = kwargs.get("messages", [])
+    if messages == []:
         messages = kwargs.get("input")
 
     if isinstance(messages, str):
@@ -207,8 +207,10 @@ def _set_output_data(span, response, kwargs, integration, finish_span=True):
     if hasattr(response, "model"):
         set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model)
 
-    messages = kwargs.get("messages")
-    if messages is None:
+    # Input messages (the prompt or data sent to the model)
+    # used for the token usage calculation
+    messages = kwargs.get("messages", [])
+    if messages == []:
         messages = kwargs.get("input")
 
     if isinstance(messages, str):

From b5dd11578b7b9af1c15fa0c5e71316433e08ce03 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Fri, 25 Jul 2025 14:53:08 +0200
Subject: [PATCH 15/27] More tests

---
 sentry_sdk/integrations/openai.py        |  61 ++++++--
 tests/integrations/openai/test_openai.py | 180 +++++++++++++++++++++++
 2 files changed, 227 insertions(+), 14 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 293b2ea864..f234f67a6c 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -248,6 +248,7 @@ def _set_output_data(span, response, kwargs, integration, finish_span=True):
         def new_iterator():
             # type: () -> Iterator[ChatCompletionChunk]
             with capture_internal_exceptions():
+                count_tokens_manually = True
                 for x in old_iterator:
                     # OpenAI chat completion API
                     if hasattr(x, "choices"):
@@ -261,31 +262,48 @@ def new_iterator():
                                     data_buf.append([])
                                 data_buf[choice_index].append(content or "")
                             choice_index += 1
+
                     # OpenAI responses API
                     elif hasattr(x, "delta"):
                         if len(data_buf) == 0:
                             data_buf.append([])
                         data_buf[0].append(x.delta or "")
+
+                    # OpenAI responses API end of streaming response
+                    if x.__class__.__name__ == "ResponseCompletedEvent":
+                        _calculate_token_usage(
+                            messages,
+                            x.response,
+                            span,
+                            None,
+                            integration.count_tokens,
+                        )
+                        count_tokens_manually = False
+
                     yield x
+
                 if len(data_buf) > 0:
                     all_responses = list(map(lambda chunk: "".join(chunk), data_buf))
                     if should_send_default_pii() and integration.include_prompts:
                         set_data_normalized(
                             span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
                         )
-                    _calculate_token_usage(
-                        messages,
-                        response,
-                        span,
-                        all_responses,
-                        integration.count_tokens,
-                    )
+                    if count_tokens_manually:
+                        _calculate_token_usage(
+                            messages,
+                            response,
+                            span,
+                            all_responses,
+                            integration.count_tokens,
+                        )
+
             if finish_span:
                 span.__exit__(None, None, None)
 
         async def new_iterator_async():
             # type: () -> AsyncIterator[ChatCompletionChunk]
             with capture_internal_exceptions():
+                count_tokens_manually = True
                 async for x in old_iterator:
                     # OpenAI chat completion API
                     if hasattr(x, "choices"):
@@ -299,25 +317,40 @@ async def new_iterator_async():
                                     data_buf.append([])
                                 data_buf[choice_index].append(content or "")
                             choice_index += 1
+
                     # OpenAI responses API
                     elif hasattr(x, "delta"):
                         if len(data_buf) == 0:
                             data_buf.append([])
                         data_buf[0].append(x.delta or "")
+
+                    # OpenAI responses API end of streaming response
+                    if x.__class__.__name__ == "ResponseCompletedEvent":
+                        _calculate_token_usage(
+                            messages,
+                            x.response,
+                            span,
+                            None,
+                            integration.count_tokens,
+                        )
+                        count_tokens_manually = False
+
                     yield x
+
                 if len(data_buf) > 0:
                     all_responses = list(map(lambda chunk: "".join(chunk), data_buf))
                     if should_send_default_pii() and integration.include_prompts:
                         set_data_normalized(
                             span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
                         )
-                    _calculate_token_usage(
-                        messages,
-                        response,
-                        span,
-                        all_responses,
-                        integration.count_tokens,
-                    )
+                    if count_tokens_manually:
+                        _calculate_token_usage(
+                            messages,
+                            response,
+                            span,
+                            all_responses,
+                            integration.count_tokens,
+                        )
             if finish_span:
                 span.__exit__(None, None, None)
 
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index a08e319bc1..45d60d22d8 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -6,6 +6,9 @@
 from openai.types.chat.chat_completion import Choice
 from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice
 from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage
+from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent
+from openai.types.responses.response_created_event import ResponseCreatedEvent
+from openai.types.responses.response_completed_event import ResponseCompletedEvent
 
 SKIP_RESPONSES_TESTS = False
 
@@ -1209,3 +1212,180 @@ async def test_error_in_responses_async_api(sentry_init, capture_events):
         error_event["contexts"]["trace"]["trace_id"]
         == transaction_event["contexts"]["trace"]["trace_id"]
     )
+
+
+EXAMPLE_RESPONSES_STREAM = [
+    ResponseCreatedEvent(
+        sequence_number=1,
+        type="response.created",
+        response=Response(
+            id="chat-id",
+            created_at=10000000,
+            model="response-model-id",
+            object="response",
+            output=[],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+        ),
+    ),
+    ResponseTextDeltaEvent(
+        item_id="msg_1",
+        sequence_number=2,
+        type="response.output_text.delta",
+        logprobs=[],
+        content_index=0,
+        output_index=0,
+        delta="hel",
+    ),
+    ResponseTextDeltaEvent(
+        item_id="msg_1",
+        sequence_number=3,
+        type="response.output_text.delta",
+        logprobs=[],
+        content_index=0,
+        output_index=0,
+        delta="lo ",
+    ),
+    ResponseTextDeltaEvent(
+        item_id="msg_1",
+        sequence_number=4,
+        type="response.output_text.delta",
+        logprobs=[],
+        content_index=0,
+        output_index=0,
+        delta="world",
+    ),
+    ResponseCompletedEvent(
+        sequence_number=5,
+        type="response.completed",
+        response=Response(
+            id="chat-id",
+            created_at=10000000,
+            model="response-model-id",
+            object="response",
+            output=[],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            usage=ResponseUsage(
+                input_tokens=20,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=5,
+                ),
+                output_tokens=10,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=8,
+                ),
+                total_tokens=30,
+            ),
+        ),
+    ),
+]
+
+
+@pytest.mark.parametrize(
+    "send_default_pii, include_prompts",
+    [(True, True), (True, False), (False, True), (False, False)],
+)
+def test_streaming_responses_api(
+    sentry_init, capture_events, send_default_pii, include_prompts
+):
+    sentry_init(
+        integrations=[
+            OpenAIIntegration(
+                include_prompts=include_prompts,
+            )
+        ],
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+    )
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    returned_stream = Stream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = EXAMPLE_RESPONSES_STREAM
+    client.responses._post = mock.Mock(return_value=returned_stream)
+
+    with start_transaction(name="openai tx"):
+        response_stream = client.responses.create(
+            model="some-model",
+            input="hello",
+            stream=True,
+        )
+
+        response_string = ""
+        for item in response_stream:
+            if hasattr(item, "delta"):
+                response_string += item.delta
+
+    assert response_string == "hello world"
+
+    (transaction,) = events
+    (span,) = transaction["spans"]
+    assert span["op"] == "gen_ai.responses"
+
+    if send_default_pii and include_prompts:
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == "hello"
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
+    else:
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+    assert span["data"]["gen_ai.usage.input_tokens"] == 20
+    assert span["data"]["gen_ai.usage.output_tokens"] == 10
+    assert span["data"]["gen_ai.usage.total_tokens"] == 30
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "send_default_pii, include_prompts",
+    [(True, True), (True, False), (False, True), (False, False)],
+)
+async def test_streaming_responses_api_async(
+    sentry_init, capture_events, send_default_pii, include_prompts
+):
+    sentry_init(
+        integrations=[
+            OpenAIIntegration(
+                include_prompts=include_prompts,
+            )
+        ],
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+    )
+    events = capture_events()
+
+    client = AsyncOpenAI(api_key="z")
+    returned_stream = AsyncStream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = async_iterator(EXAMPLE_RESPONSES_STREAM)
+    client.responses._post = AsyncMock(return_value=returned_stream)
+
+    with start_transaction(name="openai tx"):
+        response_stream = await client.responses.create(
+            model="some-model",
+            input="hello",
+            stream=True,
+        )
+
+        response_string = ""
+        async for item in response_stream:
+            if hasattr(item, "delta"):
+                response_string += item.delta
+
+    assert response_string == "hello world"
+
+    (transaction,) = events
+    (span,) = transaction["spans"]
+    assert span["op"] == "gen_ai.responses"
+
+    if send_default_pii and include_prompts:
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == "hello"
+        assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world"
+    else:
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+    assert span["data"]["gen_ai.usage.input_tokens"] == 20
+    assert span["data"]["gen_ai.usage.output_tokens"] == 10
+    assert span["data"]["gen_ai.usage.total_tokens"] == 30

From 2c5d1f09d09981438f86069f6fbe957d47d1dbe9 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Fri, 25 Jul 2025 15:00:59 +0200
Subject: [PATCH 16/27] import ordering

---
 tests/integrations/openai/test_openai.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 45d60d22d8..f028f4fca2 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -6,13 +6,13 @@
 from openai.types.chat.chat_completion import Choice
 from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice
 from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage
-from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent
-from openai.types.responses.response_created_event import ResponseCreatedEvent
-from openai.types.responses.response_completed_event import ResponseCompletedEvent
 
 SKIP_RESPONSES_TESTS = False
 
 try:
+    from openai.types.responses.response_completed_event import ResponseCompletedEvent
+    from openai.types.responses.response_created_event import ResponseCreatedEvent
+    from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent
     from openai.types.responses.response_usage import (
         InputTokensDetails,
         OutputTokensDetails,

From f629b0d27627e6006a8f5f7c9ae42e6ff89ecc96 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Fri, 25 Jul 2025 15:08:57 +0200
Subject: [PATCH 17/27] tests

---
 tests/integrations/openai/test_openai.py | 131 ++++++++++++-----------
 1 file changed, 67 insertions(+), 64 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index f028f4fca2..b3626d4c43 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1214,74 +1214,77 @@ async def test_error_in_responses_async_api(sentry_init, capture_events):
     )
 
 
-EXAMPLE_RESPONSES_STREAM = [
-    ResponseCreatedEvent(
-        sequence_number=1,
-        type="response.created",
-        response=Response(
-            id="chat-id",
-            created_at=10000000,
-            model="response-model-id",
-            object="response",
-            output=[],
-            parallel_tool_calls=False,
-            tool_choice="none",
-            tools=[],
+if SKIP_RESPONSES_TESTS:
+    EXAMPLE_RESPONSES_STREAM = []
+else:
+    EXAMPLE_RESPONSES_STREAM = [
+        ResponseCreatedEvent(
+            sequence_number=1,
+            type="response.created",
+            response=Response(
+                id="chat-id",
+                created_at=10000000,
+                model="response-model-id",
+                object="response",
+                output=[],
+                parallel_tool_calls=False,
+                tool_choice="none",
+                tools=[],
+            ),
         ),
-    ),
-    ResponseTextDeltaEvent(
-        item_id="msg_1",
-        sequence_number=2,
-        type="response.output_text.delta",
-        logprobs=[],
-        content_index=0,
-        output_index=0,
-        delta="hel",
-    ),
-    ResponseTextDeltaEvent(
-        item_id="msg_1",
-        sequence_number=3,
-        type="response.output_text.delta",
-        logprobs=[],
-        content_index=0,
-        output_index=0,
-        delta="lo ",
-    ),
-    ResponseTextDeltaEvent(
-        item_id="msg_1",
-        sequence_number=4,
-        type="response.output_text.delta",
-        logprobs=[],
-        content_index=0,
-        output_index=0,
-        delta="world",
-    ),
-    ResponseCompletedEvent(
-        sequence_number=5,
-        type="response.completed",
-        response=Response(
-            id="chat-id",
-            created_at=10000000,
-            model="response-model-id",
-            object="response",
-            output=[],
-            parallel_tool_calls=False,
-            tool_choice="none",
-            tools=[],
-            usage=ResponseUsage(
-                input_tokens=20,
-                input_tokens_details=InputTokensDetails(
-                    cached_tokens=5,
-                ),
-                output_tokens=10,
-                output_tokens_details=OutputTokensDetails(
-                    reasoning_tokens=8,
+        ResponseTextDeltaEvent(
+            item_id="msg_1",
+            sequence_number=2,
+            type="response.output_text.delta",
+            logprobs=[],
+            content_index=0,
+            output_index=0,
+            delta="hel",
+        ),
+        ResponseTextDeltaEvent(
+            item_id="msg_1",
+            sequence_number=3,
+            type="response.output_text.delta",
+            logprobs=[],
+            content_index=0,
+            output_index=0,
+            delta="lo ",
+        ),
+        ResponseTextDeltaEvent(
+            item_id="msg_1",
+            sequence_number=4,
+            type="response.output_text.delta",
+            logprobs=[],
+            content_index=0,
+            output_index=0,
+            delta="world",
+        ),
+        ResponseCompletedEvent(
+            sequence_number=5,
+            type="response.completed",
+            response=Response(
+                id="chat-id",
+                created_at=10000000,
+                model="response-model-id",
+                object="response",
+                output=[],
+                parallel_tool_calls=False,
+                tool_choice="none",
+                tools=[],
+                usage=ResponseUsage(
+                    input_tokens=20,
+                    input_tokens_details=InputTokensDetails(
+                        cached_tokens=5,
+                    ),
+                    output_tokens=10,
+                    output_tokens_details=OutputTokensDetails(
+                        reasoning_tokens=8,
+                    ),
+                    total_tokens=30,
                 ),
-                total_tokens=30,
             ),
         ),
-    ),
-]
+    ]
 
 
 @pytest.mark.parametrize(

From d76a56341b60a87729e75b869ba47c512e3f09cf Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Fri, 25 Jul 2025 15:18:10 +0200
Subject: [PATCH 18/27] tests

---
 tests/integrations/openai/test_openai.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index b3626d4c43..fe282d0853 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1291,6 +1291,7 @@ async def test_error_in_responses_async_api(sentry_init, capture_events):
     "send_default_pii, include_prompts",
     [(True, True), (True, False), (False, True), (False, False)],
 )
+@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 def test_streaming_responses_api(
     sentry_init, capture_events, send_default_pii, include_prompts
 ):
@@ -1345,6 +1346,7 @@ def test_streaming_responses_api(
     "send_default_pii, include_prompts",
     [(True, True), (True, False), (False, True), (False, False)],
 )
+@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
 async def test_streaming_responses_api_async(
     sentry_init, capture_events, send_default_pii, include_prompts
 ):

From f9dfe5e7a6024e5c4a111bc2fb8788f544fe5ac6 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 28 Jul 2025 10:13:59 +0200
Subject: [PATCH 19/27] Rename pipeline name

---
 sentry_sdk/ai/monitoring.py | 6 +++---
 sentry_sdk/consts.py        | 7 +++++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/sentry_sdk/ai/monitoring.py b/sentry_sdk/ai/monitoring.py
index 7a687736d0..e3f372c3ba 100644
--- a/sentry_sdk/ai/monitoring.py
+++ b/sentry_sdk/ai/monitoring.py
@@ -40,7 +40,7 @@ def sync_wrapped(*args, **kwargs):
                 for k, v in kwargs.pop("sentry_data", {}).items():
                     span.set_data(k, v)
                 if curr_pipeline:
-                    span.set_data(SPANDATA.AI_PIPELINE_NAME, curr_pipeline)
+                    span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, curr_pipeline)
                     return f(*args, **kwargs)
                 else:
                     _ai_pipeline_name.set(description)
@@ -69,7 +69,7 @@ async def async_wrapped(*args, **kwargs):
                 for k, v in kwargs.pop("sentry_data", {}).items():
                     span.set_data(k, v)
                 if curr_pipeline:
-                    span.set_data(SPANDATA.AI_PIPELINE_NAME, curr_pipeline)
+                    span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, curr_pipeline)
                     return await f(*args, **kwargs)
                 else:
                     _ai_pipeline_name.set(description)
@@ -108,7 +108,7 @@ def record_token_usage(
     # TODO: move pipeline name elsewhere
     ai_pipeline_name = get_ai_pipeline_name()
     if ai_pipeline_name:
-        span.set_data(SPANDATA.AI_PIPELINE_NAME, ai_pipeline_name)
+        span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, ai_pipeline_name)
 
     if input_tokens is not None:
         span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index 533aa9f815..9d8842ac9f 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -169,6 +169,7 @@ class SPANDATA:
     AI_PIPELINE_NAME = "ai.pipeline.name"
     """
     Name of the AI pipeline or chain being executed.
+    DEPRECATED: Use GEN_AI_PIPELINE_NAME instead.
     Example: "qa-pipeline"
     """
 
@@ -372,6 +373,12 @@ class SPANDATA:
     Example: "chat"
     """
 
+    GEN_AI_PIPELINE_NAME = "gen_ai.pipeline.name"
+    """
+    Name of the AI pipeline or chain being executed.
+    Example: "qa-pipeline"
+    """
+
     GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
     """
     Exact model identifier used to generate the response

From 14baf6cfc5c9efb4dbd39ca58932ea0d134f75d3 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 28 Jul 2025 10:15:36 +0200
Subject: [PATCH 20/27] Rename streaming attribute

---
 sentry_sdk/consts.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index 9d8842ac9f..a82ff94c49 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -230,6 +230,7 @@ class SPANDATA:
     AI_STREAMING = "ai.streaming"
     """
     Whether or not the AI model call's response was streamed back asynchronously
+    DEPRECATED: Use GEN_AI_RESPONSE_STREAMING instead.
     Example: true
     """
 
@@ -385,6 +386,12 @@ class SPANDATA:
     Example: gpt-4o-mini-2024-07-18
     """
 
+    GEN_AI_RESPONSE_STREAMING = "gen_ai.response.streaming"
+    """
+    Whether or not the AI model call's response was streamed back asynchronously
+    Example: true
+    """
+
     GEN_AI_RESPONSE_TEXT = "gen_ai.response.text"
     """
     The model's response text messages.

From f2cab2d1780f656c160cb4e1812faea6cad0484b Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 28 Jul 2025 12:09:10 +0200
Subject: [PATCH 21/27] Apply suggestions from code review

Co-authored-by: Ivana Kellyer <ivana.kellyer@sentry.io>
---
 sentry_sdk/integrations/openai.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index f234f67a6c..9520fdf16f 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -270,7 +270,7 @@ def new_iterator():
                         data_buf[0].append(x.delta or "")
 
                     # OpenAI responses API end of streaming response
-                    if x.__class__.__name__ == "ResponseCompletedEvent":
+                    if isinstance(x, ResponseCompletedEvent):
                         _calculate_token_usage(
                             messages,
                             x.response,
@@ -283,7 +283,7 @@ def new_iterator():
                     yield x
 
                 if len(data_buf) > 0:
-                    all_responses = list(map(lambda chunk: "".join(chunk), data_buf))
+                    all_responses = ["".join(chunk) for chunk in data_buf]
                     if should_send_default_pii() and integration.include_prompts:
                         set_data_normalized(
                             span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
@@ -338,7 +338,7 @@ async def new_iterator_async():
                     yield x
 
                 if len(data_buf) > 0:
-                    all_responses = list(map(lambda chunk: "".join(chunk), data_buf))
+                    all_responses = ["".join(chunk) for chunk in data_buf]
                     if should_send_default_pii() and integration.include_prompts:
                         set_data_normalized(
                             span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses

From d13a5d0d5bc4c5385051335a781b2d68b52d83b0 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 28 Jul 2025 12:10:52 +0200
Subject: [PATCH 22/27] review feedback

---
 sentry_sdk/integrations/openai.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 9520fdf16f..ab965ed706 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -32,6 +32,7 @@
 try:
     # responses API support was introduced in v1.66.0
     from openai.resources.responses import Responses, AsyncResponses
+    from openai.types.responses.response_completed_event import ResponseCompletedEvent
 except ImportError:
     RESPONSES_API_ENABLED = False
 
@@ -325,7 +326,7 @@ async def new_iterator_async():
                         data_buf[0].append(x.delta or "")
 
                     # OpenAI responses API end of streaming response
-                    if x.__class__.__name__ == "ResponseCompletedEvent":
+                    if isinstance(x, ResponseCompletedEvent):
                         _calculate_token_usage(
                             messages,
                             x.response,

From 157f95af0e2345bc42927d0f9a9441826050ebb6 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 28 Jul 2025 12:27:11 +0200
Subject: [PATCH 23/27] Review feedback

---
 sentry_sdk/integrations/openai.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index ab965ed706..9b43fac29a 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -100,7 +100,7 @@ def _get_usage(usage, names):
 def _calculate_token_usage(
     messages, response, span, streaming_message_responses, count_tokens
 ):
-    # type: (Iterable[ChatCompletionMessageParam], Any, Span, Optional[List[str]], Callable[..., Any]) -> None
+    # type: (Optional[Iterable[ChatCompletionMessageParam]], Any, Span, Optional[List[str]], Callable[..., Any]) -> None
     input_tokens = 0  # type: Optional[int]
     input_tokens_cached = 0  # type: Optional[int]
     output_tokens = 0  # type: Optional[int]
@@ -126,7 +126,7 @@ def _calculate_token_usage(
 
     # Manually count tokens
     if input_tokens == 0:
-        for message in messages:
+        for message in messages or []:
             if isinstance(message, dict) and "content" in message:
                 input_tokens += count_tokens(message["content"])
             elif isinstance(message, str):
@@ -161,8 +161,8 @@ def _calculate_token_usage(
 def _set_input_data(span, kwargs, operation, integration):
     # type: (Span, dict[str, Any], str, OpenAIIntegration) -> None
     # Input messages (the prompt or data sent to the model)
-    messages = kwargs.get("messages", [])
-    if messages == []:
+    messages = kwargs.get("messages")
+    if messages is None:
         messages = kwargs.get("input")
 
     if isinstance(messages, str):
@@ -196,7 +196,7 @@ def _set_input_data(span, kwargs, operation, integration):
             set_data_normalized(span, attribute, value)
 
     # Input attributes: Tools
-    tools = kwargs.get("tools", [])
+    tools = kwargs.get("tools")
     if tools is not None and len(tools) > 0:
         set_data_normalized(
             span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools)
@@ -210,11 +210,11 @@ def _set_output_data(span, response, kwargs, integration, finish_span=True):
 
     # Input messages (the prompt or data sent to the model)
     # used for the token usage calculation
-    messages = kwargs.get("messages", [])
-    if messages == []:
+    messages = kwargs.get("messages")
+    if messages is None:
         messages = kwargs.get("input")
 
-    if isinstance(messages, str):
+    if messages is not None and isinstance(messages, str):
         messages = [messages]
 
     if hasattr(response, "choices"):

From 0aa26ebf1adede7f3bc5b7e54c52f51af12f36d0 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 28 Jul 2025 12:47:12 +0200
Subject: [PATCH 24/27] resilience

---
 sentry_sdk/integrations/openai.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 9b43fac29a..72363590bb 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -271,7 +271,7 @@ def new_iterator():
                         data_buf[0].append(x.delta or "")
 
                     # OpenAI responses API end of streaming response
-                    if isinstance(x, ResponseCompletedEvent):
+                    if RESPONSES_API_ENABLED and isinstance(x, ResponseCompletedEvent):
                         _calculate_token_usage(
                             messages,
                             x.response,
@@ -326,7 +326,7 @@ async def new_iterator_async():
                         data_buf[0].append(x.delta or "")
 
                     # OpenAI responses API end of streaming response
-                    if isinstance(x, ResponseCompletedEvent):
+                    if RESPONSES_API_ENABLED and isinstance(x, ResponseCompletedEvent):
                         _calculate_token_usage(
                             messages,
                             x.response,

From e04174e4d26b66621b61676da4d6632534adb43a Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 28 Jul 2025 12:56:32 +0200
Subject: [PATCH 25/27] steaming

---
 sentry_sdk/integrations/openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index f234f67a6c..00875787bc 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -182,7 +182,7 @@ def _set_input_data(span, kwargs, operation, integration):
     # Input attributes: Optional
     kwargs_keys_to_attributes = {
         "model": SPANDATA.GEN_AI_REQUEST_MODEL,
-        "stream": SPANDATA.AI_STREAMING,
+        "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
         "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
         "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY,
         "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,

From eb629de7fccd644044faa000261e5cad906a0e2f Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 28 Jul 2025 13:42:25 +0200
Subject: [PATCH 26/27] do not calculate token usage twice

---
 sentry_sdk/integrations/openai.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 72363590bb..98e83607dd 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -226,6 +226,7 @@ def _set_output_data(span, response, kwargs, integration, finish_span=True):
                     SPANDATA.GEN_AI_RESPONSE_TEXT,
                     safe_serialize(response_text),
                 )
+        _calculate_token_usage(messages, response, span, None, integration.count_tokens)
         if finish_span:
             span.__exit__(None, None, None)
 
@@ -238,6 +239,7 @@ def _set_output_data(span, response, kwargs, integration, finish_span=True):
                     SPANDATA.GEN_AI_RESPONSE_TEXT,
                     safe_serialize(response_text),
                 )
+        _calculate_token_usage(messages, response, span, None, integration.count_tokens)
         if finish_span:
             span.__exit__(None, None, None)
 
@@ -360,11 +362,10 @@ async def new_iterator_async():
         else:
             response._iterator = new_iterator()
     else:
+        _calculate_token_usage(messages, response, span, None, integration.count_tokens)
         if finish_span:
             span.__exit__(None, None, None)
 
-    _calculate_token_usage(messages, response, span, None, integration.count_tokens)
-
 
 def _new_chat_completion_common(f, *args, **kwargs):
     # type: (Any, Any, Any) -> Any

From 1f4ed0cd242628ebe51e37ead42d9f375e81d2d6 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 28 Jul 2025 13:54:09 +0200
Subject: [PATCH 27/27] updated test

---
 tests/integrations/openai/test_openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index fe282d0853..dfac08d762 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1159,11 +1159,11 @@ async def test_ai_client_span_streaming_responses_async_api(
     assert spans[0]["op"] == "gen_ai.responses"
     assert spans[0]["origin"] == "auto.ai.openai"
     assert spans[0]["data"] == {
-        "ai.streaming": True,
         "gen_ai.operation.name": "responses",
         "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?",
         "gen_ai.request.model": "gpt-4o",
         "gen_ai.response.model": "response-model-id",
+        "gen_ai.response.streaming": True,
         "gen_ai.system": "openai",
         "gen_ai.usage.input_tokens": 20,
         "gen_ai.usage.input_tokens.cached": 5,