From c056d6fb0c075f86b9526f12929c27c137113179 Mon Sep 17 00:00:00 2001
From: saivedant169 <saivedant169@gmail.com>
Date: Wed, 8 Apr 2026 23:43:53 -0400
Subject: [PATCH 1/3] fix(langchain): detach existing SpanHolder token before
 overwrite in _create_llm_span

Closes #3957

When _create_llm_span() is called for a run_id that already has an entry
in self.spans, the old holder's token was lost without being detached,
leaving an orphaned context_api.attach() on the OTel context stack. This
is the same class of bug as #3526 / #3807.

Defensively detach the existing holder's token before replacing the entry.
---
 .../instrumentation/langchain/callback_handler.py          | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py
index 28315b7f10..379cd7481c 100644
--- a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py
+++ b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py
@@ -472,6 +472,13 @@ def _create_llm_span(
             # If context setting fails, continue without suppression token
             token = None
 
+        # Detach any existing holder's token before overwriting, otherwise
+        # the previous context_api.attach() is orphaned and corrupts the
+        # OTel context stack (same class of bug as #3526 / #3807).
+        existing_holder = self.spans.get(run_id)
+        if existing_holder is not None and existing_holder.token is not None:
+            self._safe_detach_context(existing_holder.token)
+
         self.spans[run_id] = SpanHolder(
             span, token, None, [], workflow_name, None, entity_path
         )

From 39bc0d90d5adccefc998df61c9dcce44707a783a Mon Sep 17 00:00:00 2001
From: saivedant169 <saivedant169@gmail.com>
Date: Sun, 12 Apr 2026 10:48:45 -0400
Subject: [PATCH 2/3] fix: detach old token before creating suppression to
 preserve context order
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Moved the existing holder detach before the suppression token attach.
The previous ordering detached after the suppression was already on the
context stack, which caused ContextVar.reset() to restore context to
before the span — wiping the suppression flag and producing duplicate
downstream spans (openai.chat, bedrock.chat, etc.).

With the corrected order:
1. Detach the orphaned span-context token (clean slate)
2. Attach the suppression token on top of the clean context
3. Store the new SpanHolder with the suppression token
---
 .../instrumentation/langchain/callback_handler.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py
index 379cd7481c..e7f5ac8da6 100644
--- a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py
+++ b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py
@@ -462,6 +462,14 @@ def _create_llm_span(
             span, GenAIAttributes.GEN_AI_OPERATION_NAME, operation_name
         )
 
+        # Detach any existing holder's token before creating the suppression
+        # token. The ordering matters: if we detach after attaching the
+        # suppression, ContextVar.reset() restores context to before the span,
+        # wiping the suppression flag and causing duplicate downstream spans.
+        existing_holder = self.spans.get(run_id)
+        if existing_holder is not None and existing_holder.token is not None:
+            self._safe_detach_context(existing_holder.token)
+
         # we already have an LLM span by this point,
         # so skip any downstream instrumentation from here
         try:
@@ -472,13 +480,6 @@ def _create_llm_span(
             # If context setting fails, continue without suppression token
             token = None
 
-        # Detach any existing holder's token before overwriting, otherwise
-        # the previous context_api.attach() is orphaned and corrupts the
-        # OTel context stack (same class of bug as #3526 / #3807).
-        existing_holder = self.spans.get(run_id)
-        if existing_holder is not None and existing_holder.token is not None:
-            self._safe_detach_context(existing_holder.token)
-
         self.spans[run_id] = SpanHolder(
             span, token, None, [], workflow_name, None, entity_path
         )

From abadc3974344051109b1b1cfcb9245804f8f5595 Mon Sep 17 00:00:00 2001
From: saivedant169 <saivedant169@gmail.com>
Date: Mon, 13 Apr 2026 20:59:56 -0400
Subject: [PATCH 3/3] fix: detach duplicate run_id token before _create_span
 overwrites holder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_create_span unconditionally overwrites self.spans[run_id] with a new
SpanHolder. When _create_llm_span is called twice with the same run_id,
the old holder — along with its suppression token — is gone from
self.spans before the existing detach logic can read it, leaking
supp_token_1 forever.

Moved the old-holder detach to run before _create_span() is called,
so the original suppression token is properly cleaned up. A second
detach remains after _create_span to handle the span_token that
_create_span just attached (otherwise the suppression would layer on
top of the span context instead of the baseline).

Added tests/test_context_token_lifecycle.py covering:
- Suppression active after _create_llm_span
- Suppression cleared after _end_span
- Duplicate run_id no longer leaks supp_token_1 (regression for #3957)

Fully closes #3957.
---
 .../langchain/callback_handler.py             |  25 ++-
 .../tests/test_context_token_lifecycle.py     | 173 ++++++++++++++++++
 2 files changed, 191 insertions(+), 7 deletions(-)
 create mode 100644 packages/opentelemetry-instrumentation-langchain/tests/test_context_token_lifecycle.py

diff --git a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py
index e7f5ac8da6..f3d41f0625 100644
--- a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py
+++ b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py
@@ -438,6 +438,16 @@ def _create_llm_span(
         workflow_name = self.get_workflow_name(parent_run_id)
         entity_path = self.get_entity_path(parent_run_id)
 
+        # Capture and detach the old holder BEFORE _create_span runs.
+        # _create_span unconditionally overwrites self.spans[run_id], which
+        # would make the old SpanHolder (and its suppression token) unreachable.
+        # Detaching here, before the overwrite, ensures we clean up the original
+        # suppression token from a duplicate run_id and that the new suppression
+        # is layered on top of the clean baseline context.
+        old_holder = self.spans.get(run_id)
+        if old_holder is not None and old_holder.token is not None:
+            self._safe_detach_context(old_holder.token)
+
         span = self._create_span(
             run_id,
             parent_run_id,
@@ -462,13 +472,14 @@ def _create_llm_span(
             span, GenAIAttributes.GEN_AI_OPERATION_NAME, operation_name
         )
 
-        # Detach any existing holder's token before creating the suppression
-        # token. The ordering matters: if we detach after attaching the
-        # suppression, ContextVar.reset() restores context to before the span,
-        # wiping the suppression flag and causing duplicate downstream spans.
-        existing_holder = self.spans.get(run_id)
-        if existing_holder is not None and existing_holder.token is not None:
-            self._safe_detach_context(existing_holder.token)
+        # _create_span has already attached the span context and stored a new
+        # SpanHolder(span, span_token, ...). Detach that span_token before
+        # attaching suppression so the suppression layers on top of the clean
+        # baseline context. If we detached after attaching suppression,
+        # ContextVar.reset() would wipe the suppression flag.
+        current_holder = self.spans.get(run_id)
+        if current_holder is not None and current_holder.token is not None:
+            self._safe_detach_context(current_holder.token)
 
         # we already have an LLM span by this point,
         # so skip any downstream instrumentation from here
diff --git a/packages/opentelemetry-instrumentation-langchain/tests/test_context_token_lifecycle.py b/packages/opentelemetry-instrumentation-langchain/tests/test_context_token_lifecycle.py
new file mode 100644
index 0000000000..b60b653a7f
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-langchain/tests/test_context_token_lifecycle.py
@@ -0,0 +1,173 @@
+"""
+Unit tests for TraceloopCallbackHandler OTel context-token lifecycle.
+
+These tests do NOT use VCR cassettes; they exercise context-management
+logic in isolation by calling internal methods directly.
+
+No real HTTP is performed — the Tracer is backed by an InMemorySpanExporter.
+"""
+from unittest.mock import MagicMock
+from uuid import uuid4
+
+import pytest
+from opentelemetry import context as context_api
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+from opentelemetry.semconv_ai import (
+    SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
+    LLMRequestTypeValues,
+)
+
+from opentelemetry.instrumentation.langchain.callback_handler import (
+    TraceloopCallbackHandler,
+)
+
+
+@pytest.fixture
+def handler():
+    exporter = InMemorySpanExporter()
+    provider = TracerProvider()
+    provider.add_span_processor(SimpleSpanProcessor(exporter))
+    tracer = provider.get_tracer("test")
+    return TraceloopCallbackHandler(
+        tracer=tracer,
+        duration_histogram=MagicMock(),
+        token_histogram=MagicMock(),
+    )
+
+
+@pytest.fixture(autouse=True)
+def restore_otel_context():
+    """
+    Snapshot the OTel context before each test and restore it afterwards.
+
+    context_api.attach(ctx) stores the *current* context in the returned token.
+    context_api.detach(token) calls ContextVar.reset(token), which unconditionally
+    resets the ContextVar to the snapshot saved in the token — regardless of any
+    intermediate attaches that happened while the test ran.  This ensures leaked
+    tokens from a failing test cannot pollute subsequent tests.
+    """
+    restore_token = context_api.attach(context_api.get_current())
+    yield
+    context_api.detach(restore_token)
+
+
+def _suppression_active() -> bool:
+    return context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY) is True
+
+
+# ---------------------------------------------------------------------------
+# Ordering fix (commit 2) — normal single-call lifecycle
+# ---------------------------------------------------------------------------
+
+def test_suppression_active_after_create_llm_span(handler):
+    """After a normal _create_llm_span call the suppression flag must be set."""
+    run_id = uuid4()
+    assert not _suppression_active(), "precondition: no suppression before call"
+
+    handler._create_llm_span(run_id, None, "gpt-4", LLMRequestTypeValues.CHAT)
+
+    assert _suppression_active(), (
+        "Suppression must be active so downstream OpenAI/Bedrock instrumentation "
+        "is skipped for this LLM call."
+    )
+
+    span = handler.spans[run_id].span
+    handler._end_span(span, run_id)
+
+
+def test_suppression_cleared_after_end_span(handler):
+    """After _end_span the suppression flag must be gone (normal single-call lifecycle)."""
+    run_id = uuid4()
+    handler._create_llm_span(run_id, None, "gpt-4", LLMRequestTypeValues.CHAT)
+
+    span = handler.spans[run_id].span
+    handler._end_span(span, run_id)
+
+    assert not _suppression_active(), (
+        "Suppression must be cleared when the LLM span ends; otherwise every "
+        "subsequent call in this thread/task is permanently suppressed."
+    )
+
+
+# ---------------------------------------------------------------------------
+# P2 — duplicate run_id leaks supp_token_1 (issue #3957)
+# ---------------------------------------------------------------------------
+
+def test_duplicate_run_id_leaks_suppression_token(handler):
+    """
+    Regression test for issue #3957.
+
+    When _create_llm_span is called a second time with the same run_id,
+    _create_span (called first inside _create_llm_span) unconditionally
+    overwrites self.spans[run_id] before _create_llm_span can read and
+    detach the old SpanHolder's suppression token.
+
+    Context-stack trace (each arrow = context_api.attach / detach)
+    ---------------------------------------------------------------
+    ctx_0            baseline (no suppression)
+      ↓ _create_span (1st call): attach span_ctx_1
+    span_ctx_1       {SPAN_KEY: span_1}          span_token_1 remembers ctx_0
+      ↓ detach span_token_1  (commit-2 ordering fix)
+    ctx_0
+      ↓ attach suppression
+    supp_ctx_1       {SUPPRESS: True}             supp_token_1 remembers ctx_0
+                     ← stored in SpanHolder; 1st _create_llm_span returns
+
+      ↓ _create_span (2nd call, same run_id!): attach span_ctx_2
+    span_ctx_2       {SUPPRESS:True, SPAN_KEY: span_2}  span_token_2 remembers supp_ctx_1
+                     _create_span writes self.spans[run_id] = SpanHolder(span_2, span_token_2)
+                     supp_token_1 is now GONE from self.spans — unreachable forever
+      ↓ _create_llm_span reads existing_holder → finds SpanHolder(span_2, span_token_2)
+      ↓ detach span_token_2
+    supp_ctx_1       {SUPPRESS: True}             NOT ctx_0 — 1st suppression lives on
+      ↓ attach new suppression
+    supp_ctx_2       {SUPPRESS: True}             supp_token_2 remembers supp_ctx_1
+                     stored in SpanHolder; 2nd _create_llm_span returns
+
+      ↓ _end_span: detach supp_token_2
+    supp_ctx_1       {SUPPRESS: True}             ← BUG: supp_token_1 never detached
+
+    Expected after _end_span: ctx_0 (no suppression)
+    Actual   after _end_span: supp_ctx_1 ({SUPPRESS: True})
+
+    Root cause
+    ----------
+    The fix must capture `existing_holder = self.spans.get(run_id)` BEFORE
+    calling `_create_span`, because _create_span unconditionally overwrites
+    self.spans[run_id] at line 334, destroying the reference to supp_token_1.
+
+    Correct fix in _create_llm_span (before the _create_span call):
+
+        old_holder = self.spans.get(run_id)
+        if old_holder is not None and old_holder.token is not None:
+            self._safe_detach_context(old_holder.token)
+
+        span = self._create_span(run_id, ...)   # now safe to overwrite
+    """
+    run_id = uuid4()
+    assert not _suppression_active(), "precondition: no suppression before test"
+
+    # First call — normal; establishes supp_token_1
+    handler._create_llm_span(run_id, None, "gpt-4", LLMRequestTypeValues.CHAT)
+    assert _suppression_active(), "suppression active after 1st call (sanity)"
+
+    # Second call with the SAME run_id — triggers issue #3957
+    handler._create_llm_span(run_id, None, "gpt-4", LLMRequestTypeValues.CHAT)
+    assert _suppression_active(), "suppression active after 2nd call (sanity)"
+
+    # End the one surviving span (from the 2nd call)
+    span = handler.spans[run_id].span
+    handler._end_span(span, run_id)
+
+    # After all spans are done the suppression MUST be cleared.
+    # P2 bug present → still True  (supp_token_1 leaked, never detached)
+    # P2 bug fixed   → None        (both tokens properly cleaned up)
+    assert not _suppression_active(), (
+        "Issue #3957 not fixed: suppression flag is still set after _end_span. "
+        "supp_token_1 from the first _create_llm_span call was never detached because "
+        "_create_span overwrites self.spans[run_id] before the old holder can be read. "
+        "Fix: move `existing_holder = self.spans.get(run_id)` (and its detach) to "
+        "BEFORE the `_create_span(...)` call in _create_llm_span."
+    )