From 95b95939556b7c0946b128b06ee6b35e1392d54d Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 24 Apr 2026 02:37:56 +0000 Subject: [PATCH 01/10] ci(fidelity): ratchet-down baseline + wire into lint.yml (closes #53) Verify-test-fidelity ran out-of-band with no CI invocation, so missing upstream-TS-test translations drifted undetected (16 gaps at 4.26.0, 40 against upstream main). Now: - Default mode is baseline-enforced via `scripts/fidelity_baseline.json`. CI fails on new misses outside the baseline; accepts baselined misses as-is. Fixed misses are reported with a nudge to tighten the baseline. - `--strict` ignores the baseline (eventual target once baseline hits 0). - `--update-baseline` regenerates the baseline file. - Initial baseline captures the 16 `[post with Plan]` gaps tracked in #55, pinned to `chat@4.26.0`. - lint.yml clones `vercel/chat@4.26.0` and runs the check; aggregates into the existing "fail if any step failed" gate. CLAUDE.md + docs/UPSTREAM_SYNC.md document the workflow. https://claude.ai/code/session_01WhrgpELQJJSakBnwSNuwGJ --- .github/workflows/lint.yml | 18 ++++- CLAUDE.md | 7 +- docs/UPSTREAM_SYNC.md | 26 +++++++ scripts/fidelity_baseline.json | 74 +++++++++++++++++++ scripts/verify_test_fidelity.py | 122 ++++++++++++++++++++++++++++++-- 5 files changed, 238 insertions(+), 9 deletions(-) create mode 100644 scripts/fidelity_baseline.json diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index d6f0771..2287b3f 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -57,6 +57,20 @@ jobs: continue-on-error: true run: uv run python scripts/audit_test_quality.py + - name: Clone upstream vercel/chat at pinned parity tag + id: clone_upstream + continue-on-error: true + run: | + git clone --depth 1 --branch chat@4.26.0 \ + https://github.com/vercel/chat.git /tmp/vercel-chat + + - name: Test fidelity check (baseline-enforced) + id: fidelity + continue-on-error: true + env: + TS_ROOT: /tmp/vercel-chat + run: uv run python scripts/verify_test_fidelity.py + - name: Pyrefly type check id: pyrefly continue-on-error: true @@ -75,6 +89,7 @@ jobs: echo "| Ruff check | ${{ steps.ruff_check.outcome }} |" >> $GITHUB_STEP_SUMMARY echo "| Ruff format | ${{ steps.ruff_format.outcome }} |" >> $GITHUB_STEP_SUMMARY echo "| Test audit | ${{ steps.audit.outcome }} |" >> $GITHUB_STEP_SUMMARY + echo "| Test fidelity | ${{ steps.fidelity.outcome }} |" >> $GITHUB_STEP_SUMMARY echo "| Pyrefly | ${{ steps.pyrefly.outcome }} |" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY if [ "${{ steps.pyrefly.outcome }}" = "success" ]; then @@ -89,10 +104,11 @@ jobs: RUFF_CHECK: ${{ steps.ruff_check.outcome }} RUFF_FORMAT: ${{ steps.ruff_format.outcome }} AUDIT: ${{ steps.audit.outcome }} + FIDELITY: ${{ steps.fidelity.outcome }} PYREFLY: ${{ steps.pyrefly.outcome }} run: | failures=0 - for var in RUFF_CHECK RUFF_FORMAT AUDIT PYREFLY; do + for var in RUFF_CHECK RUFF_FORMAT AUDIT FIDELITY PYREFLY; do outcome="${!var}" if [ "$outcome" != "success" ]; then echo "$var failed (outcome: $outcome)" diff --git a/CLAUDE.md b/CLAUDE.md index f838b86..6b7928e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -105,5 +105,8 @@ async mock bugs, and cross-file duplicates. PRs that introduce hard failures will not pass CI. **Fidelity check** (`scripts/verify_test_fidelity.py`) verifies every TS -`it("...")` has a matching Python `def test_*()`. Must show 0 missing before -committing test changes. +`it("...")` has a matching Python `def test_*()`, pinned to `chat@4.26.0`. +Default mode is baseline-enforced: CI fails on any NEW miss not listed in +`scripts/fidelity_baseline.json`. Run `--update-baseline` after porting a +missing test (or documenting an intentional skip in `UPSTREAM_SYNC.md`). Use +`--strict` to verify the final 0-missing target locally. diff --git a/docs/UPSTREAM_SYNC.md b/docs/UPSTREAM_SYNC.md index 1d60574..d0400d1 100644 --- a/docs/UPSTREAM_SYNC.md +++ b/docs/UPSTREAM_SYNC.md @@ -72,6 +72,32 @@ tests. If upstream tests lock in inconsistent behavior, choose one of: - **Preserve parity** and document the inconsistency in the non-parity section below - **Intentionally diverge** and document the divergence in the non-parity section +### Test fidelity baseline + +`scripts/verify_test_fidelity.py` runs in CI (`.github/workflows/lint.yml`) pinned +to `vercel/chat@4.26.0` (matches the `UPSTREAM_PARITY` constant in +`src/chat_sdk/__init__.py`). Default mode is **baseline-enforced**: + +- The current set of missing TS-test translations lives in + `scripts/fidelity_baseline.json`. +- CI fails if a TS test is missing that is **not** in the baseline (new drift). +- CI succeeds if all currently-missing tests are a subset of the baseline — even + if nothing has been ported yet. +- Fixed tests (in baseline but now ported) are reported with a reminder to + tighten the baseline. + +Workflows: + +| Goal | Command | +|------|---------| +| Port a missing test | Write the Python test, then `--update-baseline` to remove it from the ratchet | +| Add a Python-only divergence (intentional skip) | Document in [Known Non-Parity](#known-non-parity-with-typescript-sdk), then `--update-baseline` | +| Upstream sync | After pulling new upstream, run default mode — any newly-added TS tests appear as NEW misses and CI fails until ported or baselined | +| Final parity check | `--strict` ignores the baseline and fails on any missing — target once baseline hits zero | + +The baseline file is ordered and stable so diffs are easy to review. Regenerate +it whenever the missing set changes — don't hand-edit. + ## Divergence Policy Every divergence from upstream has a cost: merge conflicts on future syncs, diff --git a/scripts/fidelity_baseline.json b/scripts/fidelity_baseline.json new file mode 100644 index 0000000..a776e2b --- /dev/null +++ b/scripts/fidelity_baseline.json @@ -0,0 +1,74 @@ +{ + "_comment": "Ratchet-down baseline for scripts/verify_test_fidelity.py. Each entry is a [describe_block, ts_it_name] pair that is known to be missing a Python translation. Default CI mode accepts any subset of this list as missing and fails on new misses outside it. To remove entries: port the TS test to its Python counterpart, then regenerate this file with --update-baseline.", + "ts_parity": "chat@4.26.0", + "total_ts_tests": 564, + "total_missing": 16, + "missing": { + "packages/chat/src/thread.test.ts": [ + [ + "post with Plan", + "should add tasks and call editObject" + ], + [ + "post with Plan", + "should call adapter postObject when supported" + ], + [ + "post with Plan", + "should complete plan and mark tasks done" + ], + [ + "post with Plan", + "should complete plan via editMessage in fallback mode" + ], + [ + "post with Plan", + "should continue accepting edits after a failed edit" + ], + [ + "post with Plan", + "should ensure sequential edits via queue" + ], + [ + "post with Plan", + "should handle various PlanContent formats in initialMessage" + ], + [ + "post with Plan", + "should post fallback text when adapter does not support plans" + ], + [ + "post with Plan", + "should propagate editObject errors from addTask" + ], + [ + "post with Plan", + "should reset plan and start fresh" + ], + [ + "post with Plan", + "should return null when calling addTask before post" + ], + [ + "post with Plan", + "should return null when calling complete before post" + ], + [ + "post with Plan", + "should return null when calling updateTask before post" + ], + [ + "post with Plan", + "should set error status via updateTask" + ], + [ + "post with Plan", + "should update current task with output" + ], + [ + "post with Plan", + "should update via editMessage in fallback mode" + ] + ] + } +} diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py index 6bb81b7..32d16c6 100644 --- a/scripts/verify_test_fidelity.py +++ b/scripts/verify_test_fidelity.py @@ -6,11 +6,20 @@ Python translation. Usage: - python scripts/verify_test_fidelity.py [--fix] + python scripts/verify_test_fidelity.py # baseline mode (default) + python scripts/verify_test_fidelity.py --strict # fail on any missing + python scripts/verify_test_fidelity.py --fix # append stubs for missing + python scripts/verify_test_fidelity.py --update-baseline # rewrite baseline -With --fix: appends stub test functions for any missing translations. +Default (baseline) mode: succeeds iff the set of missing tests is a subset of +``scripts/fidelity_baseline.json``. Tests that are in the baseline but now pass +are reported as fixed. New misses outside the baseline fail CI. + +``--strict`` ignores the baseline and fails on any missing. This is the +eventual target once the baseline count ratchets to zero. """ +import json import os import re import sys @@ -18,6 +27,7 @@ TS_ROOT = os.environ.get("TS_ROOT", "/tmp/vercel-chat") PY_ROOT = os.environ.get("PY_ROOT", str(Path(__file__).parent.parent)) +BASELINE_PATH = Path(__file__).parent / "fidelity_baseline.json" # Mapping: TS test file -> Python test file MAPPING = { @@ -205,15 +215,66 @@ def count_absorbers(py_path: str) -> int: return count +def load_baseline(path: Path) -> dict[str, set[tuple[str, str]]]: + """Load fidelity baseline. Missing file returns empty baseline.""" + if not path.exists(): + return {} + with open(path) as f: + data = json.load(f) + out: dict[str, set[tuple[str, str]]] = {} + for ts_rel, entries in data.get("missing", {}).items(): + out[ts_rel] = {(e[0], e[1]) for e in entries} + return out + + +def write_baseline(path: Path, all_missing: dict[str, list], total_ts: int) -> None: + """Persist the current set of missing tests as the new baseline.""" + payload = { + "_comment": ( + "Ratchet-down baseline for scripts/verify_test_fidelity.py. " + "Each entry is a [describe_block, ts_it_name] pair that is known " + "to be missing a Python translation. Default CI mode accepts any " + "subset of this list as missing and fails on new misses outside " + "it. To remove entries: port the TS test to its Python counterpart, " + "then regenerate this file with --update-baseline." + ), + "ts_parity": "chat@4.26.0", + "total_ts_tests": total_ts, + "total_missing": sum(len(v) for v in all_missing.values()), + "missing": { + ts_rel: [[d, t] for d, t, _p in sorted(entries, key=lambda e: (e[0], e[1]))] + for ts_rel, entries in sorted(all_missing.items()) + if entries + }, + } + with open(path, "w") as f: + json.dump(payload, f, indent=2, sort_keys=False) + f.write("\n") + + def main() -> int: fix_mode = "--fix" in sys.argv + strict_mode = "--strict" in sys.argv + update_baseline = "--update-baseline" in sys.argv + + baseline = {} if (strict_mode or update_baseline) else load_baseline(BASELINE_PATH) + total_missing = 0 total_matched = 0 total_ts = 0 total_absorbers = 0 + all_missing: dict[str, list] = {} + new_misses: dict[str, list[tuple[str, str]]] = {} + fixed: dict[str, list[tuple[str, str]]] = {} print("=" * 70) print("TEST FIDELITY REPORT") + if strict_mode: + print(" mode: --strict (baseline ignored)") + elif update_baseline: + print(" mode: --update-baseline (rewriting baseline)") + else: + print(f" mode: baseline ({BASELINE_PATH.name})") print("=" * 70) for ts_rel, py_rel in MAPPING.items(): @@ -231,6 +292,16 @@ def main() -> int: total_matched += matched total_missing += len(missing) total_absorbers += absorbers + all_missing[ts_rel] = missing + + current_missing_keys = {(d, t) for d, t, _p in missing} + baseline_keys = baseline.get(ts_rel, set()) + file_new = sorted(current_missing_keys - baseline_keys) + file_fixed = sorted(baseline_keys - current_missing_keys) + if file_new: + new_misses[ts_rel] = file_new + if file_fixed: + fixed[ts_rel] = file_fixed absorber_note = f" ({absorbers} absorbers)" if absorbers else "" status = "OK" if not missing else f"GAPS ({len(missing)})" @@ -243,7 +314,8 @@ def main() -> int: if missing: for describe, ts_name, _py_name in missing[:5]: - print(f" MISSING: [{describe}] {ts_name}") + marker = "NEW" if (describe, ts_name) in set(file_new) else "baselined" + print(f" MISSING ({marker}): [{describe}] {ts_name}") if len(missing) > 5: print(f" ... and {len(missing) - 5} more") @@ -272,10 +344,48 @@ def main() -> int: else: print(f"TOTAL: {total_matched}/{total_ts} matched ({pct}%), {total_missing} missing") - if total_missing > 0: - print("\nRun with --fix to generate stubs for missing tests.") + if update_baseline: + write_baseline(BASELINE_PATH, all_missing, total_ts) + print(f"\nBaseline written to {BASELINE_PATH}") + print(f" {total_missing} missing tests baselined across {sum(1 for v in all_missing.values() if v)} files") + return 0 + + if total_missing == 0: + print("\nAll TS tests have Python equivalents.") + if any(baseline.values()): + print("Baseline is stale — run with --update-baseline to clear it.") + return 0 + + if strict_mode: + print(f"\n{total_missing} missing (strict mode — baseline ignored).") + print("Run with --fix to generate stubs for missing tests.") return 1 - print("\nAll TS tests have Python equivalents.") + + if new_misses: + new_count = sum(len(v) for v in new_misses.values()) + print(f"\n{new_count} NEW miss(es) outside the baseline:") + for ts_rel, entries in new_misses.items(): + for describe, ts_name in entries: + print(f" - {ts_rel} :: [{describe}] {ts_name}") + print("\nOptions:") + print(" 1. Port the missing TS test(s) to the matching Python file") + print(" 2. If intentional divergence, document in docs/UPSTREAM_SYNC.md") + print(" and re-baseline with --update-baseline") + print("\nRun with --fix to generate Python stubs for missing tests.") + return 1 + + if fixed: + fixed_count = sum(len(v) for v in fixed.values()) + print(f"\n✓ {fixed_count} test(s) fixed since baseline (no longer missing):") + for _ts_rel, entries in fixed.items(): + for describe, ts_name in entries[:5]: + print(f" - [{describe}] {ts_name}") + if len(entries) > 5: + print(f" ... and {len(entries) - 5} more") + print("\nRun with --update-baseline to tighten the baseline.") + + baseline_total = sum(len(v) for v in baseline.values()) + print(f"\n{total_missing}/{baseline_total} baseline miss(es) still present — no new drift.") return 0 From 0c7006f5baf5e38faa2fd915c809d4c6d0b73973 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 24 Apr 2026 04:50:32 +0000 Subject: [PATCH 02/10] chore(pypi): add keywords + broaden classifiers for discoverability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PyPI search has no signal today — keywords field was absent, classifiers covered only Chat + Typed. Added 12 keywords covering high-volume search shapes (generic, per-platform, asyncio, vercel) and 4 classifiers (Communications parent, Internet, Libraries :: Application Frameworks, Libraries :: Python Modules) matching patterns used by slack-sdk, python-telegram-bot, discord.py, errbot. No runtime change; hatchling build output is unaffected. https://claude.ai/code/session_01WhrgpELQJJSakBnwSNuwGJ --- pyproject.toml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 5f04908..9b19b53 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,20 @@ name = "chat-sdk" version = "0.4.26.2" description = "Multi-platform async chat SDK for Python — port of Vercel Chat" +keywords = [ + "chat", + "chatbot", + "chatops", + "slack-bot", + "discord-bot", + "telegram-bot", + "teams-bot", + "whatsapp-bot", + "bot-framework", + "async", + "asyncio", + "vercel", +] readme = "README.md" license = {text = "MIT"} requires-python = ">=3.10" @@ -16,7 +30,11 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Topic :: Communications", "Topic :: Communications :: Chat", + "Topic :: Internet", + "Topic :: Software Development :: Libraries :: Application Frameworks", + "Topic :: Software Development :: Libraries :: Python Modules", "Typing :: Typed", ] From d1fbeacd30b67b723160e7730a41cd525a209c06 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 24 Apr 2026 04:50:40 +0000 Subject: [PATCH 03/10] refactor(teams): drop duplicate _escape_table_cell, use shared (#70) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit teams/format_converter.py had a byte-identical private copy of shared/card_utils.escape_table_cell. Removed the copy, imported the shared function, renamed the 2 call sites. Also added direct unit tests for shared.card_utils.escape_table_cell + render_gfm_table — both were previously covered only transitively through adapter tests. First sub-PR of #70 (shared-helper consolidation). Scope check: most of #70's five planned helpers already live in shared/card_utils.py — only escape_table_cell (this PR) and button-style mapping (Teams + Discord, remaining work) are still duplicated per-adapter. https://claude.ai/code/session_01WhrgpELQJJSakBnwSNuwGJ --- .../adapters/teams/format_converter.py | 10 ++-- tests/test_cards.py | 49 +++++++++++++++++++ 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/src/chat_sdk/adapters/teams/format_converter.py b/src/chat_sdk/adapters/teams/format_converter.py index a96199b..23596ea 100644 --- a/src/chat_sdk/adapters/teams/format_converter.py +++ b/src/chat_sdk/adapters/teams/format_converter.py @@ -22,11 +22,7 @@ get_node_value, parse_markdown, ) - - -def _escape_table_cell(value: str) -> str: - """Escape pipe characters in table cells for GFM rendering.""" - return value.replace("\\", "\\\\").replace("|", "\\|").replace("\n", " ") +from chat_sdk.shared.card_utils import escape_table_cell class TeamsFormatConverter(BaseFormatConverter): @@ -194,11 +190,11 @@ def _table_to_gfm(self, node: Content) -> str: lines: list[str] = [] # Header row - lines.append(f"| {' | '.join(_escape_table_cell(c) for c in rows[0])} |") + lines.append(f"| {' | '.join(escape_table_cell(c) for c in rows[0])} |") # Separator separators = ["---"] * len(rows[0]) lines.append(f"| {' | '.join(separators)} |") # Data rows for row in rows[1:]: - lines.append(f"| {' | '.join(_escape_table_cell(c) for c in row)} |") + lines.append(f"| {' | '.join(escape_table_cell(c) for c in row)} |") return "\n".join(lines) diff --git a/tests/test_cards.py b/tests/test_cards.py index fe4d278..2ba7c82 100644 --- a/tests/test_cards.py +++ b/tests/test_cards.py @@ -8,6 +8,7 @@ is_card_element, table_element_to_ascii, ) +from chat_sdk.shared.card_utils import escape_table_cell, render_gfm_table class TestIsCardElement: @@ -159,3 +160,51 @@ def test_unknown_element(self): def test_button_element_returns_none(self): child = {"type": "button", "label": "Click me"} assert card_child_to_fallback_text(child) is None + + +class TestEscapeTableCell: + """Tests for shared.card_utils.escape_table_cell.""" + + def test_plain_text_passthrough(self): + assert escape_table_cell("hello world") == "hello world" + + def test_pipe_escaped(self): + assert escape_table_cell("a|b") == r"a\|b" + + def test_backslash_doubled_before_pipe_escape(self): + # Backslash must be doubled FIRST so that a literal `\|` in input + # doesn't collide with the subsequent pipe-escape. + assert escape_table_cell(r"a\b") == r"a\\b" + assert escape_table_cell(r"a\|b") == r"a\\\|b" + + def test_newline_collapsed_to_space(self): + assert escape_table_cell("line1\nline2") == "line1 line2" + + def test_multiple_substitutions(self): + assert escape_table_cell("a|b\nc\\d") == r"a\|b c\\d" + + def test_empty_string(self): + assert escape_table_cell("") == "" + + +class TestRenderGfmTable: + """Tests for shared.card_utils.render_gfm_table.""" + + def test_basic_table(self): + lines = render_gfm_table(["h1", "h2"], [["a", "b"], ["c", "d"]]) + assert lines == [ + "| h1 | h2 |", + "| --- | --- |", + "| a | b |", + "| c | d |", + ] + + def test_cells_are_escaped(self): + lines = render_gfm_table(["col"], [["pipe|inside"], ["has\nnewline"]]) + assert r"pipe\|inside" in lines[2] + assert "has newline" in lines[3] + + def test_empty_rows(self): + # No data rows — only header + separator. + lines = render_gfm_table(["only"], []) + assert lines == ["| only |", "| --- |"] From 016586f78904bd43dc9d5379cf5358718caa3729 Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Fri, 24 Apr 2026 03:27:47 -0700 Subject: [PATCH 04/10] ci(fidelity): fail script when mapped TS files are missing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously `verify_test_fidelity.py` printed "SKIPPED (file not found)" for any mapped TS test whose source didn't exist under `TS_ROOT`, then summed 0 matches + 0 missing and exited 0 with "All TS tests have Python equivalents." Combined with `continue-on-error: true` on the upstream-clone step, that made a silently-failing clone report "fidelity check passed" in CI. Now the script tracks missing-TS-file hits separately from real successes. If any mapped TS file is absent at end-of-run, the script prints a clear "upstream checkout missing — cannot verify fidelity" message naming every missing path, includes the clone command hint, and exits 1. This fires before strict/baseline/update-baseline success branches so no mode can accidentally mask it. Reproducer: TS_ROOT=/tmp/definitely-missing uv run python scripts/verify_test_fidelity.py Before: exit 0. After: exit 1 with infra-level error message. Closes self-review gap #1 on #72. --- scripts/verify_test_fidelity.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py index 32d16c6..6daf141 100644 --- a/scripts/verify_test_fidelity.py +++ b/scripts/verify_test_fidelity.py @@ -266,6 +266,7 @@ def main() -> int: all_missing: dict[str, list] = {} new_misses: dict[str, list[tuple[str, str]]] = {} fixed: dict[str, list[tuple[str, str]]] = {} + missing_ts_files: list[str] = [] print("=" * 70) print("TEST FIDELITY REPORT") @@ -280,7 +281,8 @@ def main() -> int: for ts_rel, py_rel in MAPPING.items(): ts_path = os.path.join(TS_ROOT, ts_rel) if not os.path.exists(ts_path): - print(f"\n{ts_rel} — SKIPPED (file not found)") + print(f"\n{ts_rel} — MISSING (upstream TS file not found at {ts_path})") + missing_ts_files.append(ts_path) continue ts_tests = extract_ts_tests(ts_path) @@ -344,6 +346,24 @@ def main() -> int: else: print(f"TOTAL: {total_matched}/{total_ts} matched ({pct}%), {total_missing} missing") + # Infra guard: if any mapped TS file is missing, we cannot verify fidelity. + # Do NOT treat this as success — a failed upstream clone would otherwise + # silently pass CI. Fail loudly before any downstream success branches. + if missing_ts_files: + print( + f"\nupstream checkout missing — cannot verify fidelity. " + f"{len(missing_ts_files)} mapped TS file(s) not found under TS_ROOT={TS_ROOT!r}:" + ) + for path in missing_ts_files: + print(f" - {path}") + print( + "\nClone the upstream repo at the pinned parity tag, e.g.:\n" + " git clone --depth 1 --branch chat@4.26.0 " + "https://github.com/vercel/chat.git /tmp/vercel-chat\n" + "then re-run with TS_ROOT=/tmp/vercel-chat." + ) + return 1 + if update_baseline: write_baseline(BASELINE_PATH, all_missing, total_ts) print(f"\nBaseline written to {BASELINE_PATH}") From 4f7d0cfdbbde8a01062cd41551fa0e1a1d923ce7 Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Fri, 24 Apr 2026 03:27:56 -0700 Subject: [PATCH 05/10] ci(fidelity): drop continue-on-error on clone, run --strict with empty baseline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two infra-level changes to the fidelity check: 1. `.github/workflows/lint.yml` — the `Clone upstream vercel/chat at pinned parity tag` step no longer carries `continue-on-error: true`. The clone is infrastructure the fidelity check depends on; if it fails, the job should fail there rather than swallow the failure and hope a later step catches it. Combined with the script-level guard from the previous commit, this is defense in depth. 2. CI now runs `scripts/verify_test_fidelity.py --strict`. Every `[post with Plan]` test that was baselined is now ported (PR #75 and PR #74 in the 0.4.26.2 bundle), so the repo ships at 0 missing. `scripts/fidelity_baseline.json` is reduced to `{"missing": {}}` (metadata retained so `--update-baseline` and the documented workflow still function for future upstream syncs). Closes self-review gap #2 on #72. --- .github/workflows/lint.yml | 5 +-- scripts/fidelity_baseline.json | 75 ++-------------------------------- 2 files changed, 6 insertions(+), 74 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 2287b3f..ebd4f22 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -59,17 +59,16 @@ jobs: - name: Clone upstream vercel/chat at pinned parity tag id: clone_upstream - continue-on-error: true run: | git clone --depth 1 --branch chat@4.26.0 \ https://github.com/vercel/chat.git /tmp/vercel-chat - - name: Test fidelity check (baseline-enforced) + - name: Test fidelity check (strict — zero missing) id: fidelity continue-on-error: true env: TS_ROOT: /tmp/vercel-chat - run: uv run python scripts/verify_test_fidelity.py + run: uv run python scripts/verify_test_fidelity.py --strict - name: Pyrefly type check id: pyrefly diff --git a/scripts/fidelity_baseline.json b/scripts/fidelity_baseline.json index a776e2b..216976f 100644 --- a/scripts/fidelity_baseline.json +++ b/scripts/fidelity_baseline.json @@ -1,74 +1,7 @@ { - "_comment": "Ratchet-down baseline for scripts/verify_test_fidelity.py. Each entry is a [describe_block, ts_it_name] pair that is known to be missing a Python translation. Default CI mode accepts any subset of this list as missing and fails on new misses outside it. To remove entries: port the TS test to its Python counterpart, then regenerate this file with --update-baseline.", + "_comment": "Ratchet-down baseline for scripts/verify_test_fidelity.py. This repo ships at strict fidelity (0 missing) against chat@4.26.0, so the baseline is empty. Default CI mode now runs --strict via .github/workflows/lint.yml; this file is retained for local workflows that want to opt back into baseline mode (e.g. during an upstream sync where several ports land in flight). To baseline genuinely-divergent tests, run scripts/verify_test_fidelity.py --update-baseline after documenting the divergence in docs/UPSTREAM_SYNC.md.", "ts_parity": "chat@4.26.0", - "total_ts_tests": 564, - "total_missing": 16, - "missing": { - "packages/chat/src/thread.test.ts": [ - [ - "post with Plan", - "should add tasks and call editObject" - ], - [ - "post with Plan", - "should call adapter postObject when supported" - ], - [ - "post with Plan", - "should complete plan and mark tasks done" - ], - [ - "post with Plan", - "should complete plan via editMessage in fallback mode" - ], - [ - "post with Plan", - "should continue accepting edits after a failed edit" - ], - [ - "post with Plan", - "should ensure sequential edits via queue" - ], - [ - "post with Plan", - "should handle various PlanContent formats in initialMessage" - ], - [ - "post with Plan", - "should post fallback text when adapter does not support plans" - ], - [ - "post with Plan", - "should propagate editObject errors from addTask" - ], - [ - "post with Plan", - "should reset plan and start fresh" - ], - [ - "post with Plan", - "should return null when calling addTask before post" - ], - [ - "post with Plan", - "should return null when calling complete before post" - ], - [ - "post with Plan", - "should return null when calling updateTask before post" - ], - [ - "post with Plan", - "should set error status via updateTask" - ], - [ - "post with Plan", - "should update current task with output" - ], - [ - "post with Plan", - "should update via editMessage in fallback mode" - ] - ] - } + "total_ts_tests": 588, + "total_missing": 0, + "missing": {} } From 404bf3e3cbcc9f6ac8e6e06ba8d9b1fd8a032c4c Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Fri, 24 Apr 2026 03:28:03 -0700 Subject: [PATCH 06/10] docs: reflect strict fidelity mode + add CHANGELOG entry for 0.4.26.2 - CLAUDE.md fidelity section: document that CI runs `--strict` and that the script now fails loudly on a missing upstream checkout. Baseline mode is retained but described as the local-dev fallback rather than the CI default. - docs/UPSTREAM_SYNC.md: same update, plus clarify the two infra guardrails (no `continue-on-error` on the clone, script-level missing-TS-file check) and rewrite the workflow table for strict mode as the default. - CHANGELOG.md: add a `### CI / Internals` subsection under the unreleased 0.4.26.2 entry describing the fidelity-check wiring, the two infra guardrails, and the empty baseline. Closes #53, #72. --- CHANGELOG.md | 9 +++++++++ CLAUDE.md | 10 ++++++---- docs/UPSTREAM_SYNC.md | 34 ++++++++++++++++++---------------- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c664d14..07b45cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -95,6 +95,15 @@ Parity catch-up with upstream `4.26.0`. No upstream version change. (`test_memory_state.py`, `test_state_postgres.py`). Closes the same flaky-test hazard fixed for the Redis backend in PR #73. +### CI / Internals + +- `verify_test_fidelity.py` now enforces against upstream on every PR + (`.github/workflows/lint.yml`); fails when the upstream clone is missing + or when any mapped TS file can't be found. Workflow runs `--strict` and + the clone step no longer carries `continue-on-error: true`, so infra + failures surface immediately at the job level. Baseline shipped empty + (all previously-missing tests ported in this release). Closes #53, #72. + ## 0.4.26.1 (2026-04-23) Python-only follow-up on `0.4.26`. Still alpha — APIs may change. diff --git a/CLAUDE.md b/CLAUDE.md index 6b7928e..26f772b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -106,7 +106,9 @@ will not pass CI. **Fidelity check** (`scripts/verify_test_fidelity.py`) verifies every TS `it("...")` has a matching Python `def test_*()`, pinned to `chat@4.26.0`. -Default mode is baseline-enforced: CI fails on any NEW miss not listed in -`scripts/fidelity_baseline.json`. Run `--update-baseline` after porting a -missing test (or documenting an intentional skip in `UPSTREAM_SYNC.md`). Use -`--strict` to verify the final 0-missing target locally. +**CI runs `--strict`** (see `.github/workflows/lint.yml`): any missing +translation fails the build, and a missing upstream checkout also fails +(the script exits non-zero when any mapped TS file isn't found). Baseline +mode (the default without `--strict`) is retained for local workflows +where a few ports land in flight — regenerate via `--update-baseline` +after documenting intentional divergence in `docs/UPSTREAM_SYNC.md`. diff --git a/docs/UPSTREAM_SYNC.md b/docs/UPSTREAM_SYNC.md index d0400d1..a73366b 100644 --- a/docs/UPSTREAM_SYNC.md +++ b/docs/UPSTREAM_SYNC.md @@ -72,31 +72,33 @@ tests. If upstream tests lock in inconsistent behavior, choose one of: - **Preserve parity** and document the inconsistency in the non-parity section below - **Intentionally diverge** and document the divergence in the non-parity section -### Test fidelity baseline +### Test fidelity (strict mode) `scripts/verify_test_fidelity.py` runs in CI (`.github/workflows/lint.yml`) pinned to `vercel/chat@4.26.0` (matches the `UPSTREAM_PARITY` constant in -`src/chat_sdk/__init__.py`). Default mode is **baseline-enforced**: +`src/chat_sdk/__init__.py`). **CI runs `--strict`** — the repo ships at 0 +missing as of `0.4.26.2` and the baseline (`scripts/fidelity_baseline.json`) +is empty. -- The current set of missing TS-test translations lives in - `scripts/fidelity_baseline.json`. -- CI fails if a TS test is missing that is **not** in the baseline (new drift). -- CI succeeds if all currently-missing tests are a subset of the baseline — even - if nothing has been ported yet. -- Fixed tests (in baseline but now ported) are reported with a reminder to - tighten the baseline. +Infra guardrails: + +- The workflow's `Clone upstream vercel/chat at pinned parity tag` step does + **not** use `continue-on-error` — a failed clone aborts the job loudly. +- The script itself fails with exit 1 if any mapped TS file is missing under + `TS_ROOT` (defense in depth against silent skips). Workflows: | Goal | Command | |------|---------| -| Port a missing test | Write the Python test, then `--update-baseline` to remove it from the ratchet | -| Add a Python-only divergence (intentional skip) | Document in [Known Non-Parity](#known-non-parity-with-typescript-sdk), then `--update-baseline` | -| Upstream sync | After pulling new upstream, run default mode — any newly-added TS tests appear as NEW misses and CI fails until ported or baselined | -| Final parity check | `--strict` ignores the baseline and fails on any missing — target once baseline hits zero | - -The baseline file is ordered and stable so diffs are easy to review. Regenerate -it whenever the missing set changes — don't hand-edit. +| Port a missing test | Write the Python test and land it; CI rejects anything that re-introduces a gap | +| Add a Python-only divergence (intentional skip) | Document in [Known Non-Parity](#known-non-parity-with-typescript-sdk), then `--update-baseline` and switch the workflow back to non-strict default for that file if truly unavoidable | +| Upstream sync | After pulling new upstream, run `--strict` — newly-added TS tests appear as missing and CI fails until ported | +| Final parity check | Same as CI: `TS_ROOT=/tmp/vercel-chat uv run python scripts/verify_test_fidelity.py --strict` | + +Baseline mode (the default without `--strict`) is retained for local +development where a few ports land in flight. Regenerate the baseline via +`--update-baseline` rather than hand-editing. ## Divergence Policy From 9400d3178dce55b9b97dbd4d65feba7720b48d2e Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Fri, 24 Apr 2026 04:13:12 -0700 Subject: [PATCH 07/10] fix(fidelity): address coderabbit review on PR #72 - CLAUDE.md: add local clone command to the quick reference so the "fails if missing" rule is actionable - verify_test_fidelity.py docstring/baseline-comment: reflect strict-mode CI contract (was still describing baseline mode) - load_baseline: validate ts_parity against UPSTREAM_PARITY so a stale baseline can't silently mask upstream drift - argparse: reject --strict + --update-baseline together (exit 2) Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 8 ++++ scripts/verify_test_fidelity.py | 76 +++++++++++++++++++++++++++------ 2 files changed, 71 insertions(+), 13 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 26f772b..97e0ea0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -112,3 +112,11 @@ translation fails the build, and a missing upstream checkout also fails mode (the default without `--strict`) is retained for local workflows where a few ports land in flight — regenerate via `--update-baseline` after documenting intentional divergence in `docs/UPSTREAM_SYNC.md`. + +Before the fidelity check can run locally, clone the pinned upstream +checkout (same command CI uses in `lint.yml`): +```bash +git clone --depth 1 --branch chat@4.26.0 \ + https://github.com/vercel/chat.git /tmp/vercel-chat +``` +Then `TS_ROOT=/tmp/vercel-chat uv run python scripts/verify_test_fidelity.py --strict`. diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py index 6daf141..38db3f7 100644 --- a/scripts/verify_test_fidelity.py +++ b/scripts/verify_test_fidelity.py @@ -6,17 +6,22 @@ Python translation. Usage: - python scripts/verify_test_fidelity.py # baseline mode (default) - python scripts/verify_test_fidelity.py --strict # fail on any missing + python scripts/verify_test_fidelity.py --strict # CI path: fail on any missing + python scripts/verify_test_fidelity.py # baseline mode (local opt-in) python scripts/verify_test_fidelity.py --fix # append stubs for missing python scripts/verify_test_fidelity.py --update-baseline # rewrite baseline -Default (baseline) mode: succeeds iff the set of missing tests is a subset of -``scripts/fidelity_baseline.json``. Tests that are in the baseline but now pass -are reported as fixed. New misses outside the baseline fail CI. - -``--strict`` ignores the baseline and fails on any missing. This is the -eventual target once the baseline count ratchets to zero. +``--strict`` is the current CI contract (see ``.github/workflows/lint.yml``): +the baseline is ignored and any missing translation — or a missing upstream +checkout — fails the build. This repo ships at zero missing against +``chat@4.26.0``. + +Baseline mode (the default without ``--strict``) is retained for local +workflows where a few ports land in flight: it succeeds iff the set of +missing tests is a subset of ``scripts/fidelity_baseline.json``. Tests that +are in the baseline but now pass are reported as fixed; new misses outside +the baseline fail. Regenerate via ``--update-baseline`` after documenting +intentional divergence in ``docs/UPSTREAM_SYNC.md``. """ import json @@ -215,12 +220,46 @@ def count_absorbers(py_path: str) -> int: return count +def _current_parity_tag() -> str | None: + """Return the baseline-format parity tag (``chat@X.Y.Z``) for the current repo. + + Reads ``UPSTREAM_PARITY`` from ``src/chat_sdk/__init__.py`` without + importing the package (avoids pulling optional runtime deps during a + script run). Returns None if the constant can't be located. + """ + init_path = Path(__file__).parent.parent / "src" / "chat_sdk" / "__init__.py" + if not init_path.exists(): + return None + with open(init_path) as f: + content = f.read() + m = re.search(r'^UPSTREAM_PARITY\s*=\s*"([^"]+)"', content, re.MULTILINE) + if not m: + return None + return f"chat@{m.group(1)}" + + def load_baseline(path: Path) -> dict[str, set[tuple[str, str]]]: - """Load fidelity baseline. Missing file returns empty baseline.""" + """Load fidelity baseline. Missing file returns empty baseline. + + Exits with code 1 when the baseline's ``ts_parity`` disagrees with the + current ``UPSTREAM_PARITY`` constant — a stale baseline could otherwise + silently mask upstream drift after a version bump. + """ if not path.exists(): return {} with open(path) as f: data = json.load(f) + baseline_parity = data.get("ts_parity") + current_parity = _current_parity_tag() + if baseline_parity and current_parity and baseline_parity != current_parity: + print( + f"\nbaseline parity mismatch: {path.name} was generated for " + f"upstream {baseline_parity}, but current parity is " + f"{current_parity} — re-run with `--update-baseline` after " + f"confirming the diff.", + file=sys.stderr, + ) + sys.exit(1) out: dict[str, set[tuple[str, str]]] = {} for ts_rel, entries in data.get("missing", {}).items(): out[ts_rel] = {(e[0], e[1]) for e in entries} @@ -233,10 +272,12 @@ def write_baseline(path: Path, all_missing: dict[str, list], total_ts: int) -> N "_comment": ( "Ratchet-down baseline for scripts/verify_test_fidelity.py. " "Each entry is a [describe_block, ts_it_name] pair that is known " - "to be missing a Python translation. Default CI mode accepts any " - "subset of this list as missing and fails on new misses outside " - "it. To remove entries: port the TS test to its Python counterpart, " - "then regenerate this file with --update-baseline." + "to be missing a Python translation. CI runs --strict (see " + ".github/workflows/lint.yml) and ignores this file; baseline " + "mode is a local-dev opt-in that accepts any subset of this " + "list as missing and fails on new misses outside it. To remove " + "entries: port the TS test to its Python counterpart, then " + "regenerate this file with --update-baseline." ), "ts_parity": "chat@4.26.0", "total_ts_tests": total_ts, @@ -257,6 +298,15 @@ def main() -> int: strict_mode = "--strict" in sys.argv update_baseline = "--update-baseline" in sys.argv + if strict_mode and update_baseline: + print( + "error: --strict and --update-baseline are mutually exclusive.\n" + " --strict says 'no missing allowed'; --update-baseline says " + "'snapshot whatever is missing into the allowlist'. Pick one.", + file=sys.stderr, + ) + return 2 + baseline = {} if (strict_mode or update_baseline) else load_baseline(BASELINE_PATH) total_missing = 0 From 7b27e2d4ee5fb9c0714c8350b1296b5eadcf8e5b Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Fri, 24 Apr 2026 10:41:40 -0700 Subject: [PATCH 08/10] fix(fidelity): preserve baseline _comment + qualify scope-of-coverage claim Addresses 2 medium findings from local adversarial review on #72: - write_baseline: preserve existing _comment rather than overwriting with boilerplate on each --update-baseline run. - CHANGELOG / CLAUDE.md / fidelity_baseline.json: "strict fidelity" now qualified as "strict fidelity for mapped core files" since MAPPING covers 8 of 17 packages/chat/src/*.test.ts files. Follow-ups filed for (a) MAPPING expansion to full core-package scope, (b) pinning upstream clone SHA in lint.yml, (c) tightening fuzzy_match against hyphen-stripping. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 5 +++- CLAUDE.md | 18 +++++++++------ docs/UPSTREAM_SYNC.md | 8 +++++-- scripts/fidelity_baseline.json | 2 +- scripts/verify_test_fidelity.py | 41 ++++++++++++++++++++++++--------- 5 files changed, 52 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07b45cd..ef13737 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -102,7 +102,10 @@ Parity catch-up with upstream `4.26.0`. No upstream version change. or when any mapped TS file can't be found. Workflow runs `--strict` and the clone step no longer carries `continue-on-error: true`, so infra failures surface immediately at the job level. Baseline shipped empty - (all previously-missing tests ported in this release). Closes #53, #72. + (all previously-missing tests ported in this release) — strict fidelity + for *mapped core files* (8 of 17 `packages/chat/src/*.test.ts` files; + see the `MAPPING` dict in `scripts/verify_test_fidelity.py` for the + authoritative scope list). Closes #53, #72. ## 0.4.26.1 (2026-04-23) diff --git a/CLAUDE.md b/CLAUDE.md index 97e0ea0..0b9772c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -105,13 +105,17 @@ async mock bugs, and cross-file duplicates. PRs that introduce hard failures will not pass CI. **Fidelity check** (`scripts/verify_test_fidelity.py`) verifies every TS -`it("...")` has a matching Python `def test_*()`, pinned to `chat@4.26.0`. -**CI runs `--strict`** (see `.github/workflows/lint.yml`): any missing -translation fails the build, and a missing upstream checkout also fails -(the script exits non-zero when any mapped TS file isn't found). Baseline -mode (the default without `--strict`) is retained for local workflows -where a few ports land in flight — regenerate via `--update-baseline` -after documenting intentional divergence in `docs/UPSTREAM_SYNC.md`. +`it("...")` in the mapped core files has a matching Python `def test_*()`, +pinned to `chat@4.26.0`. The `MAPPING` dict in that script is the +authoritative scope list — it currently covers 8 of 17 +`packages/chat/src/*.test.ts` files (extending it is tracked as a +follow-up). **CI runs `--strict`** (see `.github/workflows/lint.yml`): +any missing translation in a mapped file fails the build, and a missing +upstream checkout also fails (the script exits non-zero when any mapped +TS file isn't found). Baseline mode (the default without `--strict`) is +retained for local workflows where a few ports land in flight — +regenerate via `--update-baseline` after documenting intentional +divergence in `docs/UPSTREAM_SYNC.md`. Before the fidelity check can run locally, clone the pinned upstream checkout (same command CI uses in `lint.yml`): diff --git a/docs/UPSTREAM_SYNC.md b/docs/UPSTREAM_SYNC.md index a73366b..a905793 100644 --- a/docs/UPSTREAM_SYNC.md +++ b/docs/UPSTREAM_SYNC.md @@ -77,8 +77,12 @@ tests. If upstream tests lock in inconsistent behavior, choose one of: `scripts/verify_test_fidelity.py` runs in CI (`.github/workflows/lint.yml`) pinned to `vercel/chat@4.26.0` (matches the `UPSTREAM_PARITY` constant in `src/chat_sdk/__init__.py`). **CI runs `--strict`** — the repo ships at 0 -missing as of `0.4.26.2` and the baseline (`scripts/fidelity_baseline.json`) -is empty. +missing *for mapped core files* as of `0.4.26.2` and the baseline +(`scripts/fidelity_baseline.json`) is empty. Scope is defined by the +`MAPPING` dict in the script: 8 of 17 `packages/chat/src/*.test.ts` files +today (extending to the remaining 9 is tracked as a follow-up). Unmapped +files are not checked — tightening scope requires editing `MAPPING` and +re-running `--strict`. Infra guardrails: diff --git a/scripts/fidelity_baseline.json b/scripts/fidelity_baseline.json index 216976f..d9cfc42 100644 --- a/scripts/fidelity_baseline.json +++ b/scripts/fidelity_baseline.json @@ -1,5 +1,5 @@ { - "_comment": "Ratchet-down baseline for scripts/verify_test_fidelity.py. This repo ships at strict fidelity (0 missing) against chat@4.26.0, so the baseline is empty. Default CI mode now runs --strict via .github/workflows/lint.yml; this file is retained for local workflows that want to opt back into baseline mode (e.g. during an upstream sync where several ports land in flight). To baseline genuinely-divergent tests, run scripts/verify_test_fidelity.py --update-baseline after documenting the divergence in docs/UPSTREAM_SYNC.md.", + "_comment": "Ratchet-down baseline for scripts/verify_test_fidelity.py. This repo ships at strict fidelity for mapped core files (0 missing) against chat@4.26.0, so the baseline is empty. Scope: the MAPPING dict in scripts/verify_test_fidelity.py is the authoritative list of TS files checked; it currently covers 8 of the 17 packages/chat/src/*.test.ts files. Default CI mode runs --strict via .github/workflows/lint.yml; this file is retained for local workflows that want to opt back into baseline mode (e.g. during an upstream sync where several ports land in flight). To baseline genuinely-divergent tests, run scripts/verify_test_fidelity.py --update-baseline after documenting the divergence in docs/UPSTREAM_SYNC.md.", "ts_parity": "chat@4.26.0", "total_ts_tests": 588, "total_missing": 0, diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py index 38db3f7..5227c81 100644 --- a/scripts/verify_test_fidelity.py +++ b/scripts/verify_test_fidelity.py @@ -266,19 +266,38 @@ def load_baseline(path: Path) -> dict[str, set[tuple[str, str]]]: return out +_DEFAULT_BASELINE_COMMENT = ( + "Ratchet-down baseline for scripts/verify_test_fidelity.py. " + "Each entry is a [describe_block, ts_it_name] pair that is known " + "to be missing a Python translation. CI runs --strict (see " + ".github/workflows/lint.yml) and ignores this file; baseline " + "mode is a local-dev opt-in that accepts any subset of this " + "list as missing and fails on new misses outside it. To remove " + "entries: port the TS test to its Python counterpart, then " + "regenerate this file with --update-baseline." +) + + def write_baseline(path: Path, all_missing: dict[str, list], total_ts: int) -> None: - """Persist the current set of missing tests as the new baseline.""" + """Persist the current set of missing tests as the new baseline. + + If ``path`` already exists and has a ``_comment`` field, that curated + comment is preserved so hand-written context (e.g. scope qualifiers, + shipping-posture notes) isn't silently overwritten on every + ``--update-baseline`` run. Only fresh files get the default boilerplate. + """ + existing_comment: str | None = None + if path.exists(): + try: + with open(path) as f: + existing = json.load(f) + if isinstance(existing.get("_comment"), str): + existing_comment = existing["_comment"] + except (OSError, json.JSONDecodeError): + existing_comment = None + payload = { - "_comment": ( - "Ratchet-down baseline for scripts/verify_test_fidelity.py. " - "Each entry is a [describe_block, ts_it_name] pair that is known " - "to be missing a Python translation. CI runs --strict (see " - ".github/workflows/lint.yml) and ignores this file; baseline " - "mode is a local-dev opt-in that accepts any subset of this " - "list as missing and fails on new misses outside it. To remove " - "entries: port the TS test to its Python counterpart, then " - "regenerate this file with --update-baseline." - ), + "_comment": existing_comment if existing_comment is not None else _DEFAULT_BASELINE_COMMENT, "ts_parity": "chat@4.26.0", "total_ts_tests": total_ts, "total_missing": sum(len(v) for v in all_missing.values()), From 638a75928915d012daf4298aed0196cf2d1c300f Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Fri, 24 Apr 2026 10:49:21 -0700 Subject: [PATCH 09/10] docs(fidelity): finish scope-qualification sweep on PR #72 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round-2 local review caught 3 spots the round-1 sweep missed: - script module docstring still said "zero missing against chat@4.26.0" unqualified (renders in --help and source) - lint.yml step title "Test fidelity check (strict — zero missing)" renders unqualified in the Actions UI - CHANGELOG entry had "Closes #53, #72"; a PR can't close itself Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/lint.yml | 2 +- CHANGELOG.md | 2 +- scripts/verify_test_fidelity.py | 7 +++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index ebd4f22..f490d45 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -63,7 +63,7 @@ jobs: git clone --depth 1 --branch chat@4.26.0 \ https://github.com/vercel/chat.git /tmp/vercel-chat - - name: Test fidelity check (strict — zero missing) + - name: Test fidelity check (strict — zero missing in mapped core files) id: fidelity continue-on-error: true env: diff --git a/CHANGELOG.md b/CHANGELOG.md index ef13737..8fd71da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -105,7 +105,7 @@ Parity catch-up with upstream `4.26.0`. No upstream version change. (all previously-missing tests ported in this release) — strict fidelity for *mapped core files* (8 of 17 `packages/chat/src/*.test.ts` files; see the `MAPPING` dict in `scripts/verify_test_fidelity.py` for the - authoritative scope list). Closes #53, #72. + authoritative scope list). Closes #53. ## 0.4.26.1 (2026-04-23) diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py index 5227c81..b41f429 100644 --- a/scripts/verify_test_fidelity.py +++ b/scripts/verify_test_fidelity.py @@ -13,8 +13,11 @@ ``--strict`` is the current CI contract (see ``.github/workflows/lint.yml``): the baseline is ignored and any missing translation — or a missing upstream -checkout — fails the build. This repo ships at zero missing against -``chat@4.26.0``. +checkout — fails the build. This repo ships at strict fidelity for mapped +core files (0 missing) against ``chat@4.26.0``. The ``MAPPING`` dict below +is the authoritative scope list; it currently covers 8 of the 17 +``packages/chat/src/*.test.ts`` files (extending it is tracked as a +follow-up). Baseline mode (the default without ``--strict``) is retained for local workflows where a few ports land in flight: it succeeds iff the set of From 9b3648c8ce5653b21ee27341d56435d23ff45a30 Mon Sep 17 00:00:00 2001 From: patrick-chinchill Date: Fri, 24 Apr 2026 10:56:07 -0700 Subject: [PATCH 10/10] fix(fidelity): use UPSTREAM_PARITY for write_baseline + scope-qualify default comment Round-3 local review caught two gaps: - write_baseline hardcoded ts_parity="chat@4.26.0". Would self-trap on the next upstream sync (regenerated baseline carries stale tag -> next run fails with re-run-update-baseline advice user just ran). Now reads _current_parity_tag(). - _DEFAULT_BASELINE_COMMENT didn't mention scope framing. Every other user-facing surface carries the "mapped core files / 8 of 17 / MAPPING" language; rm-ing the baseline and regenerating silently dropped it. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/verify_test_fidelity.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py index b41f429..63f8acc 100644 --- a/scripts/verify_test_fidelity.py +++ b/scripts/verify_test_fidelity.py @@ -270,14 +270,19 @@ def load_baseline(path: Path) -> dict[str, set[tuple[str, str]]]: _DEFAULT_BASELINE_COMMENT = ( - "Ratchet-down baseline for scripts/verify_test_fidelity.py. " - "Each entry is a [describe_block, ts_it_name] pair that is known " - "to be missing a Python translation. CI runs --strict (see " - ".github/workflows/lint.yml) and ignores this file; baseline " - "mode is a local-dev opt-in that accepts any subset of this " - "list as missing and fails on new misses outside it. To remove " - "entries: port the TS test to its Python counterpart, then " - "regenerate this file with --update-baseline." + "Ratchet-down baseline for scripts/verify_test_fidelity.py. This " + "repo ships at strict fidelity for mapped core files (0 missing) " + "against the current UPSTREAM_PARITY tag, so the baseline is " + "normally empty. Scope: the MAPPING dict in " + "scripts/verify_test_fidelity.py is the authoritative list of TS " + "files checked; it currently covers 8 of the 17 " + "packages/chat/src/*.test.ts files. Default CI mode runs --strict " + "via .github/workflows/lint.yml; this file is retained for local " + "workflows that want to opt back into baseline mode (e.g. during " + "an upstream sync where several ports land in flight). To " + "baseline genuinely-divergent tests, run " + "scripts/verify_test_fidelity.py --update-baseline after " + "documenting the divergence in docs/UPSTREAM_SYNC.md." ) @@ -299,9 +304,14 @@ def write_baseline(path: Path, all_missing: dict[str, list], total_ts: int) -> N except (OSError, json.JSONDecodeError): existing_comment = None + # Derive ts_parity from UPSTREAM_PARITY so a fresh regen after an + # upstream version bump doesn't self-trap on a stale literal. Fall + # back to the last-known literal only if UPSTREAM_PARITY can't be + # read (e.g. __init__.py missing during an in-flight refactor). + current_parity = _current_parity_tag() payload = { "_comment": existing_comment if existing_comment is not None else _DEFAULT_BASELINE_COMMENT, - "ts_parity": "chat@4.26.0", + "ts_parity": current_parity if current_parity is not None else "chat@4.26.0", "total_ts_tests": total_ts, "total_missing": sum(len(v) for v in all_missing.values()), "missing": {