From 95b95939556b7c0946b128b06ee6b35e1392d54d Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 24 Apr 2026 02:37:56 +0000
Subject: [PATCH 01/10] ci(fidelity): ratchet-down baseline + wire into
 lint.yml (closes #53)

Verify-test-fidelity ran out-of-band with no CI invocation, so missing
upstream-TS-test translations drifted undetected (16 gaps at 4.26.0,
40 against upstream main). Now:

- Default mode is baseline-enforced via `scripts/fidelity_baseline.json`.
  CI fails on new misses outside the baseline; accepts baselined misses
  as-is. Fixed misses are reported with a nudge to tighten the baseline.
- `--strict` ignores the baseline (eventual target once baseline hits 0).
- `--update-baseline` regenerates the baseline file.
- Initial baseline captures the 16 `[post with Plan]` gaps tracked in #55,
  pinned to `chat@4.26.0`.
- lint.yml clones `vercel/chat@4.26.0` and runs the check; aggregates into
  the existing "fail if any step failed" gate.

CLAUDE.md + docs/UPSTREAM_SYNC.md document the workflow.

https://claude.ai/code/session_01WhrgpELQJJSakBnwSNuwGJ
---
 .github/workflows/lint.yml      |  18 ++++-
 CLAUDE.md                       |   7 +-
 docs/UPSTREAM_SYNC.md           |  26 +++++++
 scripts/fidelity_baseline.json  |  74 +++++++++++++++++++
 scripts/verify_test_fidelity.py | 122 ++++++++++++++++++++++++++++++--
 5 files changed, 238 insertions(+), 9 deletions(-)
 create mode 100644 scripts/fidelity_baseline.json

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index d6f0771..2287b3f 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -57,6 +57,20 @@ jobs:
         continue-on-error: true
         run: uv run python scripts/audit_test_quality.py
 
+      - name: Clone upstream vercel/chat at pinned parity tag
+        id: clone_upstream
+        continue-on-error: true
+        run: |
+          git clone --depth 1 --branch chat@4.26.0 \
+            https://github.com/vercel/chat.git /tmp/vercel-chat
+
+      - name: Test fidelity check (baseline-enforced)
+        id: fidelity
+        continue-on-error: true
+        env:
+          TS_ROOT: /tmp/vercel-chat
+        run: uv run python scripts/verify_test_fidelity.py
+
       - name: Pyrefly type check
         id: pyrefly
         continue-on-error: true
@@ -75,6 +89,7 @@ jobs:
           echo "| Ruff check | ${{ steps.ruff_check.outcome }} |" >> $GITHUB_STEP_SUMMARY
           echo "| Ruff format | ${{ steps.ruff_format.outcome }} |" >> $GITHUB_STEP_SUMMARY
           echo "| Test audit | ${{ steps.audit.outcome }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Test fidelity | ${{ steps.fidelity.outcome }} |" >> $GITHUB_STEP_SUMMARY
           echo "| Pyrefly | ${{ steps.pyrefly.outcome }} |" >> $GITHUB_STEP_SUMMARY
           echo "" >> $GITHUB_STEP_SUMMARY
           if [ "${{ steps.pyrefly.outcome }}" = "success" ]; then
@@ -89,10 +104,11 @@ jobs:
           RUFF_CHECK: ${{ steps.ruff_check.outcome }}
           RUFF_FORMAT: ${{ steps.ruff_format.outcome }}
           AUDIT: ${{ steps.audit.outcome }}
+          FIDELITY: ${{ steps.fidelity.outcome }}
           PYREFLY: ${{ steps.pyrefly.outcome }}
         run: |
           failures=0
-          for var in RUFF_CHECK RUFF_FORMAT AUDIT PYREFLY; do
+          for var in RUFF_CHECK RUFF_FORMAT AUDIT FIDELITY PYREFLY; do
             outcome="${!var}"
             if [ "$outcome" != "success" ]; then
               echo "$var failed (outcome: $outcome)"
diff --git a/CLAUDE.md b/CLAUDE.md
index f838b86..6b7928e 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -105,5 +105,8 @@ async mock bugs, and cross-file duplicates. PRs that introduce hard failures
 will not pass CI.
 
 **Fidelity check** (`scripts/verify_test_fidelity.py`) verifies every TS
-`it("...")` has a matching Python `def test_*()`. Must show 0 missing before
-committing test changes.
+`it("...")` has a matching Python `def test_*()`, pinned to `chat@4.26.0`.
+Default mode is baseline-enforced: CI fails on any NEW miss not listed in
+`scripts/fidelity_baseline.json`. Run `--update-baseline` after porting a
+missing test (or documenting an intentional skip in `UPSTREAM_SYNC.md`). Use
+`--strict` to verify the final 0-missing target locally.
diff --git a/docs/UPSTREAM_SYNC.md b/docs/UPSTREAM_SYNC.md
index 1d60574..d0400d1 100644
--- a/docs/UPSTREAM_SYNC.md
+++ b/docs/UPSTREAM_SYNC.md
@@ -72,6 +72,32 @@ tests. If upstream tests lock in inconsistent behavior, choose one of:
 - **Preserve parity** and document the inconsistency in the non-parity section below
 - **Intentionally diverge** and document the divergence in the non-parity section
 
+### Test fidelity baseline
+
+`scripts/verify_test_fidelity.py` runs in CI (`.github/workflows/lint.yml`) pinned
+to `vercel/chat@4.26.0` (matches the `UPSTREAM_PARITY` constant in
+`src/chat_sdk/__init__.py`). Default mode is **baseline-enforced**:
+
+- The current set of missing TS-test translations lives in
+  `scripts/fidelity_baseline.json`.
+- CI fails if a TS test is missing that is **not** in the baseline (new drift).
+- CI succeeds if all currently-missing tests are a subset of the baseline — even
+  if nothing has been ported yet.
+- Fixed tests (in baseline but now ported) are reported with a reminder to
+  tighten the baseline.
+
+Workflows:
+
+| Goal | Command |
+|------|---------|
+| Port a missing test | Write the Python test, then `--update-baseline` to remove it from the ratchet |
+| Add a Python-only divergence (intentional skip) | Document in [Known Non-Parity](#known-non-parity-with-typescript-sdk), then `--update-baseline` |
+| Upstream sync | After pulling new upstream, run default mode — any newly-added TS tests appear as NEW misses and CI fails until ported or baselined |
+| Final parity check | `--strict` ignores the baseline and fails on any missing — target once baseline hits zero |
+
+The baseline file is ordered and stable so diffs are easy to review. Regenerate
+it whenever the missing set changes — don't hand-edit.
+
 ## Divergence Policy
 
 Every divergence from upstream has a cost: merge conflicts on future syncs,
diff --git a/scripts/fidelity_baseline.json b/scripts/fidelity_baseline.json
new file mode 100644
index 0000000..a776e2b
--- /dev/null
+++ b/scripts/fidelity_baseline.json
@@ -0,0 +1,74 @@
+{
+  "_comment": "Ratchet-down baseline for scripts/verify_test_fidelity.py. Each entry is a [describe_block, ts_it_name] pair that is known to be missing a Python translation. Default CI mode accepts any subset of this list as missing and fails on new misses outside it. To remove entries: port the TS test to its Python counterpart, then regenerate this file with --update-baseline.",
+  "ts_parity": "chat@4.26.0",
+  "total_ts_tests": 564,
+  "total_missing": 16,
+  "missing": {
+    "packages/chat/src/thread.test.ts": [
+      [
+        "post with Plan",
+        "should add tasks and call editObject"
+      ],
+      [
+        "post with Plan",
+        "should call adapter postObject when supported"
+      ],
+      [
+        "post with Plan",
+        "should complete plan and mark tasks done"
+      ],
+      [
+        "post with Plan",
+        "should complete plan via editMessage in fallback mode"
+      ],
+      [
+        "post with Plan",
+        "should continue accepting edits after a failed edit"
+      ],
+      [
+        "post with Plan",
+        "should ensure sequential edits via queue"
+      ],
+      [
+        "post with Plan",
+        "should handle various PlanContent formats in initialMessage"
+      ],
+      [
+        "post with Plan",
+        "should post fallback text when adapter does not support plans"
+      ],
+      [
+        "post with Plan",
+        "should propagate editObject errors from addTask"
+      ],
+      [
+        "post with Plan",
+        "should reset plan and start fresh"
+      ],
+      [
+        "post with Plan",
+        "should return null when calling addTask before post"
+      ],
+      [
+        "post with Plan",
+        "should return null when calling complete before post"
+      ],
+      [
+        "post with Plan",
+        "should return null when calling updateTask before post"
+      ],
+      [
+        "post with Plan",
+        "should set error status via updateTask"
+      ],
+      [
+        "post with Plan",
+        "should update current task with output"
+      ],
+      [
+        "post with Plan",
+        "should update via editMessage in fallback mode"
+      ]
+    ]
+  }
+}
diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py
index 6bb81b7..32d16c6 100644
--- a/scripts/verify_test_fidelity.py
+++ b/scripts/verify_test_fidelity.py
@@ -6,11 +6,20 @@
 Python translation.
 
 Usage:
-    python scripts/verify_test_fidelity.py [--fix]
+    python scripts/verify_test_fidelity.py             # baseline mode (default)
+    python scripts/verify_test_fidelity.py --strict    # fail on any missing
+    python scripts/verify_test_fidelity.py --fix       # append stubs for missing
+    python scripts/verify_test_fidelity.py --update-baseline  # rewrite baseline
 
-With --fix: appends stub test functions for any missing translations.
+Default (baseline) mode: succeeds iff the set of missing tests is a subset of
+``scripts/fidelity_baseline.json``. Tests that are in the baseline but now pass
+are reported as fixed. New misses outside the baseline fail CI.
+
+``--strict`` ignores the baseline and fails on any missing. This is the
+eventual target once the baseline count ratchets to zero.
 """
 
+import json
 import os
 import re
 import sys
@@ -18,6 +27,7 @@
 
 TS_ROOT = os.environ.get("TS_ROOT", "/tmp/vercel-chat")
 PY_ROOT = os.environ.get("PY_ROOT", str(Path(__file__).parent.parent))
+BASELINE_PATH = Path(__file__).parent / "fidelity_baseline.json"
 
 # Mapping: TS test file -> Python test file
 MAPPING = {
@@ -205,15 +215,66 @@ def count_absorbers(py_path: str) -> int:
     return count
 
 
+def load_baseline(path: Path) -> dict[str, set[tuple[str, str]]]:
+    """Load fidelity baseline. Missing file returns empty baseline."""
+    if not path.exists():
+        return {}
+    with open(path) as f:
+        data = json.load(f)
+    out: dict[str, set[tuple[str, str]]] = {}
+    for ts_rel, entries in data.get("missing", {}).items():
+        out[ts_rel] = {(e[0], e[1]) for e in entries}
+    return out
+
+
+def write_baseline(path: Path, all_missing: dict[str, list], total_ts: int) -> None:
+    """Persist the current set of missing tests as the new baseline."""
+    payload = {
+        "_comment": (
+            "Ratchet-down baseline for scripts/verify_test_fidelity.py. "
+            "Each entry is a [describe_block, ts_it_name] pair that is known "
+            "to be missing a Python translation. Default CI mode accepts any "
+            "subset of this list as missing and fails on new misses outside "
+            "it. To remove entries: port the TS test to its Python counterpart, "
+            "then regenerate this file with --update-baseline."
+        ),
+        "ts_parity": "chat@4.26.0",
+        "total_ts_tests": total_ts,
+        "total_missing": sum(len(v) for v in all_missing.values()),
+        "missing": {
+            ts_rel: [[d, t] for d, t, _p in sorted(entries, key=lambda e: (e[0], e[1]))]
+            for ts_rel, entries in sorted(all_missing.items())
+            if entries
+        },
+    }
+    with open(path, "w") as f:
+        json.dump(payload, f, indent=2, sort_keys=False)
+        f.write("\n")
+
+
 def main() -> int:
     fix_mode = "--fix" in sys.argv
+    strict_mode = "--strict" in sys.argv
+    update_baseline = "--update-baseline" in sys.argv
+
+    baseline = {} if (strict_mode or update_baseline) else load_baseline(BASELINE_PATH)
+
     total_missing = 0
     total_matched = 0
     total_ts = 0
     total_absorbers = 0
+    all_missing: dict[str, list] = {}
+    new_misses: dict[str, list[tuple[str, str]]] = {}
+    fixed: dict[str, list[tuple[str, str]]] = {}
 
     print("=" * 70)
     print("TEST FIDELITY REPORT")
+    if strict_mode:
+        print("  mode: --strict (baseline ignored)")
+    elif update_baseline:
+        print("  mode: --update-baseline (rewriting baseline)")
+    else:
+        print(f"  mode: baseline ({BASELINE_PATH.name})")
     print("=" * 70)
 
     for ts_rel, py_rel in MAPPING.items():
@@ -231,6 +292,16 @@ def main() -> int:
         total_matched += matched
         total_missing += len(missing)
         total_absorbers += absorbers
+        all_missing[ts_rel] = missing
+
+        current_missing_keys = {(d, t) for d, t, _p in missing}
+        baseline_keys = baseline.get(ts_rel, set())
+        file_new = sorted(current_missing_keys - baseline_keys)
+        file_fixed = sorted(baseline_keys - current_missing_keys)
+        if file_new:
+            new_misses[ts_rel] = file_new
+        if file_fixed:
+            fixed[ts_rel] = file_fixed
 
         absorber_note = f" ({absorbers} absorbers)" if absorbers else ""
         status = "OK" if not missing else f"GAPS ({len(missing)})"
@@ -243,7 +314,8 @@ def main() -> int:
 
         if missing:
             for describe, ts_name, _py_name in missing[:5]:
-                print(f"    MISSING: [{describe}] {ts_name}")
+                marker = "NEW" if (describe, ts_name) in set(file_new) else "baselined"
+                print(f"    MISSING ({marker}): [{describe}] {ts_name}")
             if len(missing) > 5:
                 print(f"    ... and {len(missing) - 5} more")
 
@@ -272,10 +344,48 @@ def main() -> int:
     else:
         print(f"TOTAL: {total_matched}/{total_ts} matched ({pct}%), {total_missing} missing")
 
-    if total_missing > 0:
-        print("\nRun with --fix to generate stubs for missing tests.")
+    if update_baseline:
+        write_baseline(BASELINE_PATH, all_missing, total_ts)
+        print(f"\nBaseline written to {BASELINE_PATH}")
+        print(f"  {total_missing} missing tests baselined across {sum(1 for v in all_missing.values() if v)} files")
+        return 0
+
+    if total_missing == 0:
+        print("\nAll TS tests have Python equivalents.")
+        if any(baseline.values()):
+            print("Baseline is stale — run with --update-baseline to clear it.")
+        return 0
+
+    if strict_mode:
+        print(f"\n{total_missing} missing (strict mode — baseline ignored).")
+        print("Run with --fix to generate stubs for missing tests.")
         return 1
-    print("\nAll TS tests have Python equivalents.")
+
+    if new_misses:
+        new_count = sum(len(v) for v in new_misses.values())
+        print(f"\n{new_count} NEW miss(es) outside the baseline:")
+        for ts_rel, entries in new_misses.items():
+            for describe, ts_name in entries:
+                print(f"  - {ts_rel} :: [{describe}] {ts_name}")
+        print("\nOptions:")
+        print("  1. Port the missing TS test(s) to the matching Python file")
+        print("  2. If intentional divergence, document in docs/UPSTREAM_SYNC.md")
+        print("     and re-baseline with --update-baseline")
+        print("\nRun with --fix to generate Python stubs for missing tests.")
+        return 1
+
+    if fixed:
+        fixed_count = sum(len(v) for v in fixed.values())
+        print(f"\n✓ {fixed_count} test(s) fixed since baseline (no longer missing):")
+        for _ts_rel, entries in fixed.items():
+            for describe, ts_name in entries[:5]:
+                print(f"    - [{describe}] {ts_name}")
+            if len(entries) > 5:
+                print(f"    ... and {len(entries) - 5} more")
+        print("\nRun with --update-baseline to tighten the baseline.")
+
+    baseline_total = sum(len(v) for v in baseline.values())
+    print(f"\n{total_missing}/{baseline_total} baseline miss(es) still present — no new drift.")
     return 0
 
 

From 0c7006f5baf5e38faa2fd915c809d4c6d0b73973 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 24 Apr 2026 04:50:32 +0000
Subject: [PATCH 02/10] chore(pypi): add keywords + broaden classifiers for
 discoverability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PyPI search has no signal today — keywords field was absent, classifiers
covered only Chat + Typed. Added 12 keywords covering high-volume search
shapes (generic, per-platform, asyncio, vercel) and 4 classifiers
(Communications parent, Internet, Libraries :: Application Frameworks,
Libraries :: Python Modules) matching patterns used by slack-sdk,
python-telegram-bot, discord.py, errbot.

No runtime change; hatchling build output is unaffected.

https://claude.ai/code/session_01WhrgpELQJJSakBnwSNuwGJ
---
 pyproject.toml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 5f04908..9b19b53 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,6 +2,20 @@
 name = "chat-sdk"
 version = "0.4.26.2"
 description = "Multi-platform async chat SDK for Python — port of Vercel Chat"
+keywords = [
+    "chat",
+    "chatbot",
+    "chatops",
+    "slack-bot",
+    "discord-bot",
+    "telegram-bot",
+    "teams-bot",
+    "whatsapp-bot",
+    "bot-framework",
+    "async",
+    "asyncio",
+    "vercel",
+]
 readme = "README.md"
 license = {text = "MIT"}
 requires-python = ">=3.10"
@@ -16,7 +30,11 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Topic :: Communications",
     "Topic :: Communications :: Chat",
+    "Topic :: Internet",
+    "Topic :: Software Development :: Libraries :: Application Frameworks",
+    "Topic :: Software Development :: Libraries :: Python Modules",
     "Typing :: Typed",
 ]
 

From d1fbeacd30b67b723160e7730a41cd525a209c06 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 24 Apr 2026 04:50:40 +0000
Subject: [PATCH 03/10] refactor(teams): drop duplicate _escape_table_cell, use
 shared (#70)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

teams/format_converter.py had a byte-identical private copy of
shared/card_utils.escape_table_cell. Removed the copy, imported the
shared function, renamed the 2 call sites. Also added direct unit
tests for shared.card_utils.escape_table_cell + render_gfm_table —
both were previously covered only transitively through adapter tests.

First sub-PR of #70 (shared-helper consolidation). Scope check: most
of #70's five planned helpers already live in shared/card_utils.py —
only escape_table_cell (this PR) and button-style mapping (Teams +
Discord, remaining work) are still duplicated per-adapter.

https://claude.ai/code/session_01WhrgpELQJJSakBnwSNuwGJ
---
 .../adapters/teams/format_converter.py        | 10 ++--
 tests/test_cards.py                           | 49 +++++++++++++++++++
 2 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/src/chat_sdk/adapters/teams/format_converter.py b/src/chat_sdk/adapters/teams/format_converter.py
index a96199b..23596ea 100644
--- a/src/chat_sdk/adapters/teams/format_converter.py
+++ b/src/chat_sdk/adapters/teams/format_converter.py
@@ -22,11 +22,7 @@
     get_node_value,
     parse_markdown,
 )
-
-
-def _escape_table_cell(value: str) -> str:
-    """Escape pipe characters in table cells for GFM rendering."""
-    return value.replace("\\", "\\\\").replace("|", "\\|").replace("\n", " ")
+from chat_sdk.shared.card_utils import escape_table_cell
 
 
 class TeamsFormatConverter(BaseFormatConverter):
@@ -194,11 +190,11 @@ def _table_to_gfm(self, node: Content) -> str:
 
         lines: list[str] = []
         # Header row
-        lines.append(f"| {' | '.join(_escape_table_cell(c) for c in rows[0])} |")
+        lines.append(f"| {' | '.join(escape_table_cell(c) for c in rows[0])} |")
         # Separator
         separators = ["---"] * len(rows[0])
         lines.append(f"| {' | '.join(separators)} |")
         # Data rows
         for row in rows[1:]:
-            lines.append(f"| {' | '.join(_escape_table_cell(c) for c in row)} |")
+            lines.append(f"| {' | '.join(escape_table_cell(c) for c in row)} |")
         return "\n".join(lines)
diff --git a/tests/test_cards.py b/tests/test_cards.py
index fe4d278..2ba7c82 100644
--- a/tests/test_cards.py
+++ b/tests/test_cards.py
@@ -8,6 +8,7 @@
     is_card_element,
     table_element_to_ascii,
 )
+from chat_sdk.shared.card_utils import escape_table_cell, render_gfm_table
 
 
 class TestIsCardElement:
@@ -159,3 +160,51 @@ def test_unknown_element(self):
     def test_button_element_returns_none(self):
         child = {"type": "button", "label": "Click me"}
         assert card_child_to_fallback_text(child) is None
+
+
+class TestEscapeTableCell:
+    """Tests for shared.card_utils.escape_table_cell."""
+
+    def test_plain_text_passthrough(self):
+        assert escape_table_cell("hello world") == "hello world"
+
+    def test_pipe_escaped(self):
+        assert escape_table_cell("a|b") == r"a\|b"
+
+    def test_backslash_doubled_before_pipe_escape(self):
+        # Backslash must be doubled FIRST so that a literal `\|` in input
+        # doesn't collide with the subsequent pipe-escape.
+        assert escape_table_cell(r"a\b") == r"a\\b"
+        assert escape_table_cell(r"a\|b") == r"a\\\|b"
+
+    def test_newline_collapsed_to_space(self):
+        assert escape_table_cell("line1\nline2") == "line1 line2"
+
+    def test_multiple_substitutions(self):
+        assert escape_table_cell("a|b\nc\\d") == r"a\|b c\\d"
+
+    def test_empty_string(self):
+        assert escape_table_cell("") == ""
+
+
+class TestRenderGfmTable:
+    """Tests for shared.card_utils.render_gfm_table."""
+
+    def test_basic_table(self):
+        lines = render_gfm_table(["h1", "h2"], [["a", "b"], ["c", "d"]])
+        assert lines == [
+            "| h1 | h2 |",
+            "| --- | --- |",
+            "| a | b |",
+            "| c | d |",
+        ]
+
+    def test_cells_are_escaped(self):
+        lines = render_gfm_table(["col"], [["pipe|inside"], ["has\nnewline"]])
+        assert r"pipe\|inside" in lines[2]
+        assert "has newline" in lines[3]
+
+    def test_empty_rows(self):
+        # No data rows — only header + separator.
+        lines = render_gfm_table(["only"], [])
+        assert lines == ["| only |", "| --- |"]

From 016586f78904bd43dc9d5379cf5358718caa3729 Mon Sep 17 00:00:00 2001
From: patrick-chinchill <patrick@chinchill.ai>
Date: Fri, 24 Apr 2026 03:27:47 -0700
Subject: [PATCH 04/10] ci(fidelity): fail script when mapped TS files are
 missing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously `verify_test_fidelity.py` printed "SKIPPED (file not found)"
for any mapped TS test whose source didn't exist under `TS_ROOT`, then
summed 0 matches + 0 missing and exited 0 with "All TS tests have
Python equivalents." Combined with `continue-on-error: true` on the
upstream-clone step, that made a silently-failing clone report
"fidelity check passed" in CI.

Now the script tracks missing-TS-file hits separately from real
successes. If any mapped TS file is absent at end-of-run, the script
prints a clear "upstream checkout missing — cannot verify fidelity"
message naming every missing path, includes the clone command hint,
and exits 1. This fires before strict/baseline/update-baseline
success branches so no mode can accidentally mask it.

Reproducer:
  TS_ROOT=/tmp/definitely-missing uv run python scripts/verify_test_fidelity.py
Before: exit 0.  After: exit 1 with infra-level error message.

Closes self-review gap #1 on #72.
---
 scripts/verify_test_fidelity.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py
index 32d16c6..6daf141 100644
--- a/scripts/verify_test_fidelity.py
+++ b/scripts/verify_test_fidelity.py
@@ -266,6 +266,7 @@ def main() -> int:
     all_missing: dict[str, list] = {}
     new_misses: dict[str, list[tuple[str, str]]] = {}
     fixed: dict[str, list[tuple[str, str]]] = {}
+    missing_ts_files: list[str] = []
 
     print("=" * 70)
     print("TEST FIDELITY REPORT")
@@ -280,7 +281,8 @@ def main() -> int:
     for ts_rel, py_rel in MAPPING.items():
         ts_path = os.path.join(TS_ROOT, ts_rel)
         if not os.path.exists(ts_path):
-            print(f"\n{ts_rel} — SKIPPED (file not found)")
+            print(f"\n{ts_rel} — MISSING (upstream TS file not found at {ts_path})")
+            missing_ts_files.append(ts_path)
             continue
 
         ts_tests = extract_ts_tests(ts_path)
@@ -344,6 +346,24 @@ def main() -> int:
     else:
         print(f"TOTAL: {total_matched}/{total_ts} matched ({pct}%), {total_missing} missing")
 
+    # Infra guard: if any mapped TS file is missing, we cannot verify fidelity.
+    # Do NOT treat this as success — a failed upstream clone would otherwise
+    # silently pass CI. Fail loudly before any downstream success branches.
+    if missing_ts_files:
+        print(
+            f"\nupstream checkout missing — cannot verify fidelity. "
+            f"{len(missing_ts_files)} mapped TS file(s) not found under TS_ROOT={TS_ROOT!r}:"
+        )
+        for path in missing_ts_files:
+            print(f"  - {path}")
+        print(
+            "\nClone the upstream repo at the pinned parity tag, e.g.:\n"
+            "  git clone --depth 1 --branch chat@4.26.0 "
+            "https://github.com/vercel/chat.git /tmp/vercel-chat\n"
+            "then re-run with TS_ROOT=/tmp/vercel-chat."
+        )
+        return 1
+
     if update_baseline:
         write_baseline(BASELINE_PATH, all_missing, total_ts)
         print(f"\nBaseline written to {BASELINE_PATH}")

From 4f7d0cfdbbde8a01062cd41551fa0e1a1d923ce7 Mon Sep 17 00:00:00 2001
From: patrick-chinchill <patrick@chinchill.ai>
Date: Fri, 24 Apr 2026 03:27:56 -0700
Subject: [PATCH 05/10] ci(fidelity): drop continue-on-error on clone, run
 --strict with empty baseline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two infra-level changes to the fidelity check:

1. `.github/workflows/lint.yml` — the `Clone upstream vercel/chat at
   pinned parity tag` step no longer carries `continue-on-error: true`.
   The clone is infrastructure the fidelity check depends on; if it
   fails, the job should fail there rather than swallow the failure
   and hope a later step catches it. Combined with the script-level
   guard from the previous commit, this is defense in depth.

2. CI now runs `scripts/verify_test_fidelity.py --strict`. Every
   `[post with Plan]` test that was baselined is now ported (PR #75
   and PR #74 in the 0.4.26.2 bundle), so the repo ships at 0 missing.
   `scripts/fidelity_baseline.json` is reduced to `{"missing": {}}`
   (metadata retained so `--update-baseline` and the documented
   workflow still function for future upstream syncs).

Closes self-review gap #2 on #72.
---
 .github/workflows/lint.yml     |  5 +--
 scripts/fidelity_baseline.json | 75 ++--------------------------------
 2 files changed, 6 insertions(+), 74 deletions(-)

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 2287b3f..ebd4f22 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -59,17 +59,16 @@ jobs:
 
       - name: Clone upstream vercel/chat at pinned parity tag
         id: clone_upstream
-        continue-on-error: true
         run: |
           git clone --depth 1 --branch chat@4.26.0 \
             https://github.com/vercel/chat.git /tmp/vercel-chat
 
-      - name: Test fidelity check (baseline-enforced)
+      - name: Test fidelity check (strict — zero missing)
         id: fidelity
         continue-on-error: true
         env:
           TS_ROOT: /tmp/vercel-chat
-        run: uv run python scripts/verify_test_fidelity.py
+        run: uv run python scripts/verify_test_fidelity.py --strict
 
       - name: Pyrefly type check
         id: pyrefly
diff --git a/scripts/fidelity_baseline.json b/scripts/fidelity_baseline.json
index a776e2b..216976f 100644
--- a/scripts/fidelity_baseline.json
+++ b/scripts/fidelity_baseline.json
@@ -1,74 +1,7 @@
 {
-  "_comment": "Ratchet-down baseline for scripts/verify_test_fidelity.py. Each entry is a [describe_block, ts_it_name] pair that is known to be missing a Python translation. Default CI mode accepts any subset of this list as missing and fails on new misses outside it. To remove entries: port the TS test to its Python counterpart, then regenerate this file with --update-baseline.",
+  "_comment": "Ratchet-down baseline for scripts/verify_test_fidelity.py. This repo ships at strict fidelity (0 missing) against chat@4.26.0, so the baseline is empty. Default CI mode now runs --strict via .github/workflows/lint.yml; this file is retained for local workflows that want to opt back into baseline mode (e.g. during an upstream sync where several ports land in flight). To baseline genuinely-divergent tests, run scripts/verify_test_fidelity.py --update-baseline after documenting the divergence in docs/UPSTREAM_SYNC.md.",
   "ts_parity": "chat@4.26.0",
-  "total_ts_tests": 564,
-  "total_missing": 16,
-  "missing": {
-    "packages/chat/src/thread.test.ts": [
-      [
-        "post with Plan",
-        "should add tasks and call editObject"
-      ],
-      [
-        "post with Plan",
-        "should call adapter postObject when supported"
-      ],
-      [
-        "post with Plan",
-        "should complete plan and mark tasks done"
-      ],
-      [
-        "post with Plan",
-        "should complete plan via editMessage in fallback mode"
-      ],
-      [
-        "post with Plan",
-        "should continue accepting edits after a failed edit"
-      ],
-      [
-        "post with Plan",
-        "should ensure sequential edits via queue"
-      ],
-      [
-        "post with Plan",
-        "should handle various PlanContent formats in initialMessage"
-      ],
-      [
-        "post with Plan",
-        "should post fallback text when adapter does not support plans"
-      ],
-      [
-        "post with Plan",
-        "should propagate editObject errors from addTask"
-      ],
-      [
-        "post with Plan",
-        "should reset plan and start fresh"
-      ],
-      [
-        "post with Plan",
-        "should return null when calling addTask before post"
-      ],
-      [
-        "post with Plan",
-        "should return null when calling complete before post"
-      ],
-      [
-        "post with Plan",
-        "should return null when calling updateTask before post"
-      ],
-      [
-        "post with Plan",
-        "should set error status via updateTask"
-      ],
-      [
-        "post with Plan",
-        "should update current task with output"
-      ],
-      [
-        "post with Plan",
-        "should update via editMessage in fallback mode"
-      ]
-    ]
-  }
+  "total_ts_tests": 588,
+  "total_missing": 0,
+  "missing": {}
 }

From 404bf3e3cbcc9f6ac8e6e06ba8d9b1fd8a032c4c Mon Sep 17 00:00:00 2001
From: patrick-chinchill <patrick@chinchill.ai>
Date: Fri, 24 Apr 2026 03:28:03 -0700
Subject: [PATCH 06/10] docs: reflect strict fidelity mode + add CHANGELOG
 entry for 0.4.26.2

- CLAUDE.md fidelity section: document that CI runs `--strict` and that
  the script now fails loudly on a missing upstream checkout. Baseline
  mode is retained but described as the local-dev fallback rather than
  the CI default.
- docs/UPSTREAM_SYNC.md: same update, plus clarify the two infra
  guardrails (no `continue-on-error` on the clone, script-level
  missing-TS-file check) and rewrite the workflow table for strict
  mode as the default.
- CHANGELOG.md: add a `### CI / Internals` subsection under the
  unreleased 0.4.26.2 entry describing the fidelity-check wiring,
  the two infra guardrails, and the empty baseline. Closes #53, #72.
---
 CHANGELOG.md          |  9 +++++++++
 CLAUDE.md             | 10 ++++++----
 docs/UPSTREAM_SYNC.md | 34 ++++++++++++++++++----------------
 3 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c664d14..07b45cd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -95,6 +95,15 @@ Parity catch-up with upstream `4.26.0`. No upstream version change.
   (`test_memory_state.py`, `test_state_postgres.py`). Closes the same
   flaky-test hazard fixed for the Redis backend in PR #73.
 
+### CI / Internals
+
+- `verify_test_fidelity.py` now enforces against upstream on every PR
+  (`.github/workflows/lint.yml`); fails when the upstream clone is missing
+  or when any mapped TS file can't be found. Workflow runs `--strict` and
+  the clone step no longer carries `continue-on-error: true`, so infra
+  failures surface immediately at the job level. Baseline shipped empty
+  (all previously-missing tests ported in this release). Closes #53, #72.
+
 ## 0.4.26.1 (2026-04-23)
 
 Python-only follow-up on `0.4.26`. Still alpha — APIs may change.
diff --git a/CLAUDE.md b/CLAUDE.md
index 6b7928e..26f772b 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -106,7 +106,9 @@ will not pass CI.
 
 **Fidelity check** (`scripts/verify_test_fidelity.py`) verifies every TS
 `it("...")` has a matching Python `def test_*()`, pinned to `chat@4.26.0`.
-Default mode is baseline-enforced: CI fails on any NEW miss not listed in
-`scripts/fidelity_baseline.json`. Run `--update-baseline` after porting a
-missing test (or documenting an intentional skip in `UPSTREAM_SYNC.md`). Use
-`--strict` to verify the final 0-missing target locally.
+**CI runs `--strict`** (see `.github/workflows/lint.yml`): any missing
+translation fails the build, and a missing upstream checkout also fails
+(the script exits non-zero when any mapped TS file isn't found). Baseline
+mode (the default without `--strict`) is retained for local workflows
+where a few ports land in flight — regenerate via `--update-baseline`
+after documenting intentional divergence in `docs/UPSTREAM_SYNC.md`.
diff --git a/docs/UPSTREAM_SYNC.md b/docs/UPSTREAM_SYNC.md
index d0400d1..a73366b 100644
--- a/docs/UPSTREAM_SYNC.md
+++ b/docs/UPSTREAM_SYNC.md
@@ -72,31 +72,33 @@ tests. If upstream tests lock in inconsistent behavior, choose one of:
 - **Preserve parity** and document the inconsistency in the non-parity section below
 - **Intentionally diverge** and document the divergence in the non-parity section
 
-### Test fidelity baseline
+### Test fidelity (strict mode)
 
 `scripts/verify_test_fidelity.py` runs in CI (`.github/workflows/lint.yml`) pinned
 to `vercel/chat@4.26.0` (matches the `UPSTREAM_PARITY` constant in
-`src/chat_sdk/__init__.py`). Default mode is **baseline-enforced**:
+`src/chat_sdk/__init__.py`). **CI runs `--strict`** — the repo ships at 0
+missing as of `0.4.26.2` and the baseline (`scripts/fidelity_baseline.json`)
+is empty.
 
-- The current set of missing TS-test translations lives in
-  `scripts/fidelity_baseline.json`.
-- CI fails if a TS test is missing that is **not** in the baseline (new drift).
-- CI succeeds if all currently-missing tests are a subset of the baseline — even
-  if nothing has been ported yet.
-- Fixed tests (in baseline but now ported) are reported with a reminder to
-  tighten the baseline.
+Infra guardrails:
+
+- The workflow's `Clone upstream vercel/chat at pinned parity tag` step does
+  **not** use `continue-on-error` — a failed clone aborts the job loudly.
+- The script itself fails with exit 1 if any mapped TS file is missing under
+  `TS_ROOT` (defense in depth against silent skips).
 
 Workflows:
 
 | Goal | Command |
 |------|---------|
-| Port a missing test | Write the Python test, then `--update-baseline` to remove it from the ratchet |
-| Add a Python-only divergence (intentional skip) | Document in [Known Non-Parity](#known-non-parity-with-typescript-sdk), then `--update-baseline` |
-| Upstream sync | After pulling new upstream, run default mode — any newly-added TS tests appear as NEW misses and CI fails until ported or baselined |
-| Final parity check | `--strict` ignores the baseline and fails on any missing — target once baseline hits zero |
-
-The baseline file is ordered and stable so diffs are easy to review. Regenerate
-it whenever the missing set changes — don't hand-edit.
+| Port a missing test | Write the Python test and land it; CI rejects anything that re-introduces a gap |
+| Add a Python-only divergence (intentional skip) | Document in [Known Non-Parity](#known-non-parity-with-typescript-sdk), then `--update-baseline` and switch the workflow back to non-strict default for that file if truly unavoidable |
+| Upstream sync | After pulling new upstream, run `--strict` — newly-added TS tests appear as missing and CI fails until ported |
+| Final parity check | Same as CI: `TS_ROOT=/tmp/vercel-chat uv run python scripts/verify_test_fidelity.py --strict` |
+
+Baseline mode (the default without `--strict`) is retained for local
+development where a few ports land in flight. Regenerate the baseline via
+`--update-baseline` rather than hand-editing.
 
 ## Divergence Policy
 

From 9400d3178dce55b9b97dbd4d65feba7720b48d2e Mon Sep 17 00:00:00 2001
From: patrick-chinchill <patrick@chinchill.ai>
Date: Fri, 24 Apr 2026 04:13:12 -0700
Subject: [PATCH 07/10] fix(fidelity): address coderabbit review on PR #72

- CLAUDE.md: add local clone command to the quick reference so the
  "fails if missing" rule is actionable
- verify_test_fidelity.py docstring/baseline-comment: reflect strict-mode
  CI contract (was still describing baseline mode)
- load_baseline: validate ts_parity against UPSTREAM_PARITY so a stale
  baseline can't silently mask upstream drift
- argparse: reject --strict + --update-baseline together (exit 2)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                       |  8 ++++
 scripts/verify_test_fidelity.py | 76 +++++++++++++++++++++++++++------
 2 files changed, 71 insertions(+), 13 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 26f772b..97e0ea0 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -112,3 +112,11 @@ translation fails the build, and a missing upstream checkout also fails
 mode (the default without `--strict`) is retained for local workflows
 where a few ports land in flight — regenerate via `--update-baseline`
 after documenting intentional divergence in `docs/UPSTREAM_SYNC.md`.
+
+Before the fidelity check can run locally, clone the pinned upstream
+checkout (same command CI uses in `lint.yml`):
+```bash
+git clone --depth 1 --branch chat@4.26.0 \
+  https://github.com/vercel/chat.git /tmp/vercel-chat
+```
+Then `TS_ROOT=/tmp/vercel-chat uv run python scripts/verify_test_fidelity.py --strict`.
diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py
index 6daf141..38db3f7 100644
--- a/scripts/verify_test_fidelity.py
+++ b/scripts/verify_test_fidelity.py
@@ -6,17 +6,22 @@
 Python translation.
 
 Usage:
-    python scripts/verify_test_fidelity.py             # baseline mode (default)
-    python scripts/verify_test_fidelity.py --strict    # fail on any missing
+    python scripts/verify_test_fidelity.py --strict    # CI path: fail on any missing
+    python scripts/verify_test_fidelity.py             # baseline mode (local opt-in)
     python scripts/verify_test_fidelity.py --fix       # append stubs for missing
     python scripts/verify_test_fidelity.py --update-baseline  # rewrite baseline
 
-Default (baseline) mode: succeeds iff the set of missing tests is a subset of
-``scripts/fidelity_baseline.json``. Tests that are in the baseline but now pass
-are reported as fixed. New misses outside the baseline fail CI.
-
-``--strict`` ignores the baseline and fails on any missing. This is the
-eventual target once the baseline count ratchets to zero.
+``--strict`` is the current CI contract (see ``.github/workflows/lint.yml``):
+the baseline is ignored and any missing translation — or a missing upstream
+checkout — fails the build. This repo ships at zero missing against
+``chat@4.26.0``.
+
+Baseline mode (the default without ``--strict``) is retained for local
+workflows where a few ports land in flight: it succeeds iff the set of
+missing tests is a subset of ``scripts/fidelity_baseline.json``. Tests that
+are in the baseline but now pass are reported as fixed; new misses outside
+the baseline fail. Regenerate via ``--update-baseline`` after documenting
+intentional divergence in ``docs/UPSTREAM_SYNC.md``.
 """
 
 import json
@@ -215,12 +220,46 @@ def count_absorbers(py_path: str) -> int:
     return count
 
 
+def _current_parity_tag() -> str | None:
+    """Return the baseline-format parity tag (``chat@X.Y.Z``) for the current repo.
+
+    Reads ``UPSTREAM_PARITY`` from ``src/chat_sdk/__init__.py`` without
+    importing the package (avoids pulling optional runtime deps during a
+    script run). Returns None if the constant can't be located.
+    """
+    init_path = Path(__file__).parent.parent / "src" / "chat_sdk" / "__init__.py"
+    if not init_path.exists():
+        return None
+    with open(init_path) as f:
+        content = f.read()
+    m = re.search(r'^UPSTREAM_PARITY\s*=\s*"([^"]+)"', content, re.MULTILINE)
+    if not m:
+        return None
+    return f"chat@{m.group(1)}"
+
+
 def load_baseline(path: Path) -> dict[str, set[tuple[str, str]]]:
-    """Load fidelity baseline. Missing file returns empty baseline."""
+    """Load fidelity baseline. Missing file returns empty baseline.
+
+    Exits with code 1 when the baseline's ``ts_parity`` disagrees with the
+    current ``UPSTREAM_PARITY`` constant — a stale baseline could otherwise
+    silently mask upstream drift after a version bump.
+    """
     if not path.exists():
         return {}
     with open(path) as f:
         data = json.load(f)
+    baseline_parity = data.get("ts_parity")
+    current_parity = _current_parity_tag()
+    if baseline_parity and current_parity and baseline_parity != current_parity:
+        print(
+            f"\nbaseline parity mismatch: {path.name} was generated for "
+            f"upstream {baseline_parity}, but current parity is "
+            f"{current_parity} — re-run with `--update-baseline` after "
+            f"confirming the diff.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
     out: dict[str, set[tuple[str, str]]] = {}
     for ts_rel, entries in data.get("missing", {}).items():
         out[ts_rel] = {(e[0], e[1]) for e in entries}
@@ -233,10 +272,12 @@ def write_baseline(path: Path, all_missing: dict[str, list], total_ts: int) -> N
         "_comment": (
             "Ratchet-down baseline for scripts/verify_test_fidelity.py. "
             "Each entry is a [describe_block, ts_it_name] pair that is known "
-            "to be missing a Python translation. Default CI mode accepts any "
-            "subset of this list as missing and fails on new misses outside "
-            "it. To remove entries: port the TS test to its Python counterpart, "
-            "then regenerate this file with --update-baseline."
+            "to be missing a Python translation. CI runs --strict (see "
+            ".github/workflows/lint.yml) and ignores this file; baseline "
+            "mode is a local-dev opt-in that accepts any subset of this "
+            "list as missing and fails on new misses outside it. To remove "
+            "entries: port the TS test to its Python counterpart, then "
+            "regenerate this file with --update-baseline."
         ),
         "ts_parity": "chat@4.26.0",
         "total_ts_tests": total_ts,
@@ -257,6 +298,15 @@ def main() -> int:
     strict_mode = "--strict" in sys.argv
     update_baseline = "--update-baseline" in sys.argv
 
+    if strict_mode and update_baseline:
+        print(
+            "error: --strict and --update-baseline are mutually exclusive.\n"
+            "  --strict says 'no missing allowed'; --update-baseline says "
+            "'snapshot whatever is missing into the allowlist'. Pick one.",
+            file=sys.stderr,
+        )
+        return 2
+
     baseline = {} if (strict_mode or update_baseline) else load_baseline(BASELINE_PATH)
 
     total_missing = 0

From 7b27e2d4ee5fb9c0714c8350b1296b5eadcf8e5b Mon Sep 17 00:00:00 2001
From: patrick-chinchill <patrick@chinchill.ai>
Date: Fri, 24 Apr 2026 10:41:40 -0700
Subject: [PATCH 08/10] fix(fidelity): preserve baseline _comment + qualify
 scope-of-coverage claim

Addresses 2 medium findings from local adversarial review on #72:

- write_baseline: preserve existing _comment rather than overwriting
  with boilerplate on each --update-baseline run.
- CHANGELOG / CLAUDE.md / fidelity_baseline.json: "strict fidelity"
  now qualified as "strict fidelity for mapped core files" since
  MAPPING covers 8 of 17 packages/chat/src/*.test.ts files.

Follow-ups filed for (a) MAPPING expansion to full core-package scope,
(b) pinning upstream clone SHA in lint.yml, (c) tightening fuzzy_match
against hyphen-stripping.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                    |  5 +++-
 CLAUDE.md                       | 18 +++++++++------
 docs/UPSTREAM_SYNC.md           |  8 +++++--
 scripts/fidelity_baseline.json  |  2 +-
 scripts/verify_test_fidelity.py | 41 ++++++++++++++++++++++++---------
 5 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 07b45cd..ef13737 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -102,7 +102,10 @@ Parity catch-up with upstream `4.26.0`. No upstream version change.
   or when any mapped TS file can't be found. Workflow runs `--strict` and
   the clone step no longer carries `continue-on-error: true`, so infra
   failures surface immediately at the job level. Baseline shipped empty
-  (all previously-missing tests ported in this release). Closes #53, #72.
+  (all previously-missing tests ported in this release) — strict fidelity
+  for *mapped core files* (8 of 17 `packages/chat/src/*.test.ts` files;
+  see the `MAPPING` dict in `scripts/verify_test_fidelity.py` for the
+  authoritative scope list). Closes #53, #72.
 
 ## 0.4.26.1 (2026-04-23)
 
diff --git a/CLAUDE.md b/CLAUDE.md
index 97e0ea0..0b9772c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -105,13 +105,17 @@ async mock bugs, and cross-file duplicates. PRs that introduce hard failures
 will not pass CI.
 
 **Fidelity check** (`scripts/verify_test_fidelity.py`) verifies every TS
-`it("...")` has a matching Python `def test_*()`, pinned to `chat@4.26.0`.
-**CI runs `--strict`** (see `.github/workflows/lint.yml`): any missing
-translation fails the build, and a missing upstream checkout also fails
-(the script exits non-zero when any mapped TS file isn't found). Baseline
-mode (the default without `--strict`) is retained for local workflows
-where a few ports land in flight — regenerate via `--update-baseline`
-after documenting intentional divergence in `docs/UPSTREAM_SYNC.md`.
+`it("...")` in the mapped core files has a matching Python `def test_*()`,
+pinned to `chat@4.26.0`. The `MAPPING` dict in that script is the
+authoritative scope list — it currently covers 8 of 17
+`packages/chat/src/*.test.ts` files (extending it is tracked as a
+follow-up). **CI runs `--strict`** (see `.github/workflows/lint.yml`):
+any missing translation in a mapped file fails the build, and a missing
+upstream checkout also fails (the script exits non-zero when any mapped
+TS file isn't found). Baseline mode (the default without `--strict`) is
+retained for local workflows where a few ports land in flight —
+regenerate via `--update-baseline` after documenting intentional
+divergence in `docs/UPSTREAM_SYNC.md`.
 
 Before the fidelity check can run locally, clone the pinned upstream
 checkout (same command CI uses in `lint.yml`):
diff --git a/docs/UPSTREAM_SYNC.md b/docs/UPSTREAM_SYNC.md
index a73366b..a905793 100644
--- a/docs/UPSTREAM_SYNC.md
+++ b/docs/UPSTREAM_SYNC.md
@@ -77,8 +77,12 @@ tests. If upstream tests lock in inconsistent behavior, choose one of:
 `scripts/verify_test_fidelity.py` runs in CI (`.github/workflows/lint.yml`) pinned
 to `vercel/chat@4.26.0` (matches the `UPSTREAM_PARITY` constant in
 `src/chat_sdk/__init__.py`). **CI runs `--strict`** — the repo ships at 0
-missing as of `0.4.26.2` and the baseline (`scripts/fidelity_baseline.json`)
-is empty.
+missing *for mapped core files* as of `0.4.26.2` and the baseline
+(`scripts/fidelity_baseline.json`) is empty. Scope is defined by the
+`MAPPING` dict in the script: 8 of 17 `packages/chat/src/*.test.ts` files
+today (extending to the remaining 9 is tracked as a follow-up). Unmapped
+files are not checked — tightening scope requires editing `MAPPING` and
+re-running `--strict`.
 
 Infra guardrails:
 
diff --git a/scripts/fidelity_baseline.json b/scripts/fidelity_baseline.json
index 216976f..d9cfc42 100644
--- a/scripts/fidelity_baseline.json
+++ b/scripts/fidelity_baseline.json
@@ -1,5 +1,5 @@
 {
-  "_comment": "Ratchet-down baseline for scripts/verify_test_fidelity.py. This repo ships at strict fidelity (0 missing) against chat@4.26.0, so the baseline is empty. Default CI mode now runs --strict via .github/workflows/lint.yml; this file is retained for local workflows that want to opt back into baseline mode (e.g. during an upstream sync where several ports land in flight). To baseline genuinely-divergent tests, run scripts/verify_test_fidelity.py --update-baseline after documenting the divergence in docs/UPSTREAM_SYNC.md.",
+  "_comment": "Ratchet-down baseline for scripts/verify_test_fidelity.py. This repo ships at strict fidelity for mapped core files (0 missing) against chat@4.26.0, so the baseline is empty. Scope: the MAPPING dict in scripts/verify_test_fidelity.py is the authoritative list of TS files checked; it currently covers 8 of the 17 packages/chat/src/*.test.ts files. Default CI mode runs --strict via .github/workflows/lint.yml; this file is retained for local workflows that want to opt back into baseline mode (e.g. during an upstream sync where several ports land in flight). To baseline genuinely-divergent tests, run scripts/verify_test_fidelity.py --update-baseline after documenting the divergence in docs/UPSTREAM_SYNC.md.",
   "ts_parity": "chat@4.26.0",
   "total_ts_tests": 588,
   "total_missing": 0,
diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py
index 38db3f7..5227c81 100644
--- a/scripts/verify_test_fidelity.py
+++ b/scripts/verify_test_fidelity.py
@@ -266,19 +266,38 @@ def load_baseline(path: Path) -> dict[str, set[tuple[str, str]]]:
     return out
 
 
+_DEFAULT_BASELINE_COMMENT = (
+    "Ratchet-down baseline for scripts/verify_test_fidelity.py. "
+    "Each entry is a [describe_block, ts_it_name] pair that is known "
+    "to be missing a Python translation. CI runs --strict (see "
+    ".github/workflows/lint.yml) and ignores this file; baseline "
+    "mode is a local-dev opt-in that accepts any subset of this "
+    "list as missing and fails on new misses outside it. To remove "
+    "entries: port the TS test to its Python counterpart, then "
+    "regenerate this file with --update-baseline."
+)
+
+
 def write_baseline(path: Path, all_missing: dict[str, list], total_ts: int) -> None:
-    """Persist the current set of missing tests as the new baseline."""
+    """Persist the current set of missing tests as the new baseline.
+
+    If ``path`` already exists and has a ``_comment`` field, that curated
+    comment is preserved so hand-written context (e.g. scope qualifiers,
+    shipping-posture notes) isn't silently overwritten on every
+    ``--update-baseline`` run. Only fresh files get the default boilerplate.
+    """
+    existing_comment: str | None = None
+    if path.exists():
+        try:
+            with open(path) as f:
+                existing = json.load(f)
+            if isinstance(existing.get("_comment"), str):
+                existing_comment = existing["_comment"]
+        except (OSError, json.JSONDecodeError):
+            existing_comment = None
+
     payload = {
-        "_comment": (
-            "Ratchet-down baseline for scripts/verify_test_fidelity.py. "
-            "Each entry is a [describe_block, ts_it_name] pair that is known "
-            "to be missing a Python translation. CI runs --strict (see "
-            ".github/workflows/lint.yml) and ignores this file; baseline "
-            "mode is a local-dev opt-in that accepts any subset of this "
-            "list as missing and fails on new misses outside it. To remove "
-            "entries: port the TS test to its Python counterpart, then "
-            "regenerate this file with --update-baseline."
-        ),
+        "_comment": existing_comment if existing_comment is not None else _DEFAULT_BASELINE_COMMENT,
         "ts_parity": "chat@4.26.0",
         "total_ts_tests": total_ts,
         "total_missing": sum(len(v) for v in all_missing.values()),

From 638a75928915d012daf4298aed0196cf2d1c300f Mon Sep 17 00:00:00 2001
From: patrick-chinchill <patrick@chinchill.ai>
Date: Fri, 24 Apr 2026 10:49:21 -0700
Subject: [PATCH 09/10] docs(fidelity): finish scope-qualification sweep on PR
 #72
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Round-2 local review caught 3 spots the round-1 sweep missed:
- script module docstring still said "zero missing against chat@4.26.0"
  unqualified (renders in --help and source)
- lint.yml step title "Test fidelity check (strict — zero missing)"
  renders unqualified in the Actions UI
- CHANGELOG entry had "Closes #53, #72"; a PR can't close itself

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/lint.yml      | 2 +-
 CHANGELOG.md                    | 2 +-
 scripts/verify_test_fidelity.py | 7 +++++--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index ebd4f22..f490d45 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -63,7 +63,7 @@ jobs:
           git clone --depth 1 --branch chat@4.26.0 \
             https://github.com/vercel/chat.git /tmp/vercel-chat
 
-      - name: Test fidelity check (strict — zero missing)
+      - name: Test fidelity check (strict — zero missing in mapped core files)
         id: fidelity
         continue-on-error: true
         env:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ef13737..8fd71da 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -105,7 +105,7 @@ Parity catch-up with upstream `4.26.0`. No upstream version change.
   (all previously-missing tests ported in this release) — strict fidelity
   for *mapped core files* (8 of 17 `packages/chat/src/*.test.ts` files;
   see the `MAPPING` dict in `scripts/verify_test_fidelity.py` for the
-  authoritative scope list). Closes #53, #72.
+  authoritative scope list). Closes #53.
 
 ## 0.4.26.1 (2026-04-23)
 
diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py
index 5227c81..b41f429 100644
--- a/scripts/verify_test_fidelity.py
+++ b/scripts/verify_test_fidelity.py
@@ -13,8 +13,11 @@
 
 ``--strict`` is the current CI contract (see ``.github/workflows/lint.yml``):
 the baseline is ignored and any missing translation — or a missing upstream
-checkout — fails the build. This repo ships at zero missing against
-``chat@4.26.0``.
+checkout — fails the build. This repo ships at strict fidelity for mapped
+core files (0 missing) against ``chat@4.26.0``. The ``MAPPING`` dict below
+is the authoritative scope list; it currently covers 8 of the 17
+``packages/chat/src/*.test.ts`` files (extending it is tracked as a
+follow-up).
 
 Baseline mode (the default without ``--strict``) is retained for local
 workflows where a few ports land in flight: it succeeds iff the set of

From 9b3648c8ce5653b21ee27341d56435d23ff45a30 Mon Sep 17 00:00:00 2001
From: patrick-chinchill <patrick@chinchill.ai>
Date: Fri, 24 Apr 2026 10:56:07 -0700
Subject: [PATCH 10/10] fix(fidelity): use UPSTREAM_PARITY for write_baseline +
 scope-qualify default comment

Round-3 local review caught two gaps:

- write_baseline hardcoded ts_parity="chat@4.26.0". Would self-trap on the
  next upstream sync (regenerated baseline carries stale tag -> next run
  fails with re-run-update-baseline advice user just ran). Now reads
  _current_parity_tag().
- _DEFAULT_BASELINE_COMMENT didn't mention scope framing. Every other
  user-facing surface carries the "mapped core files / 8 of 17 / MAPPING"
  language; rm-ing the baseline and regenerating silently dropped it.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/verify_test_fidelity.py | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py
index b41f429..63f8acc 100644
--- a/scripts/verify_test_fidelity.py
+++ b/scripts/verify_test_fidelity.py
@@ -270,14 +270,19 @@ def load_baseline(path: Path) -> dict[str, set[tuple[str, str]]]:
 
 
 _DEFAULT_BASELINE_COMMENT = (
-    "Ratchet-down baseline for scripts/verify_test_fidelity.py. "
-    "Each entry is a [describe_block, ts_it_name] pair that is known "
-    "to be missing a Python translation. CI runs --strict (see "
-    ".github/workflows/lint.yml) and ignores this file; baseline "
-    "mode is a local-dev opt-in that accepts any subset of this "
-    "list as missing and fails on new misses outside it. To remove "
-    "entries: port the TS test to its Python counterpart, then "
-    "regenerate this file with --update-baseline."
+    "Ratchet-down baseline for scripts/verify_test_fidelity.py. This "
+    "repo ships at strict fidelity for mapped core files (0 missing) "
+    "against the current UPSTREAM_PARITY tag, so the baseline is "
+    "normally empty. Scope: the MAPPING dict in "
+    "scripts/verify_test_fidelity.py is the authoritative list of TS "
+    "files checked; it currently covers 8 of the 17 "
+    "packages/chat/src/*.test.ts files. Default CI mode runs --strict "
+    "via .github/workflows/lint.yml; this file is retained for local "
+    "workflows that want to opt back into baseline mode (e.g. during "
+    "an upstream sync where several ports land in flight). To "
+    "baseline genuinely-divergent tests, run "
+    "scripts/verify_test_fidelity.py --update-baseline after "
+    "documenting the divergence in docs/UPSTREAM_SYNC.md."
 )
 
 
@@ -299,9 +304,14 @@ def write_baseline(path: Path, all_missing: dict[str, list], total_ts: int) -> N
         except (OSError, json.JSONDecodeError):
             existing_comment = None
 
+    # Derive ts_parity from UPSTREAM_PARITY so a fresh regen after an
+    # upstream version bump doesn't self-trap on a stale literal. Fall
+    # back to the last-known literal only if UPSTREAM_PARITY can't be
+    # read (e.g. __init__.py missing during an in-flight refactor).
+    current_parity = _current_parity_tag()
     payload = {
         "_comment": existing_comment if existing_comment is not None else _DEFAULT_BASELINE_COMMENT,
-        "ts_parity": "chat@4.26.0",
+        "ts_parity": current_parity if current_parity is not None else "chat@4.26.0",
         "total_ts_tests": total_ts,
         "total_missing": sum(len(v) for v in all_missing.values()),
         "missing": {