diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index d6f0771..f490d45 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -57,6 +57,19 @@ jobs: continue-on-error: true run: uv run python scripts/audit_test_quality.py + - name: Clone upstream vercel/chat at pinned parity tag + id: clone_upstream + run: | + git clone --depth 1 --branch chat@4.26.0 \ + https://github.com/vercel/chat.git /tmp/vercel-chat + + - name: Test fidelity check (strict — zero missing in mapped core files) + id: fidelity + continue-on-error: true + env: + TS_ROOT: /tmp/vercel-chat + run: uv run python scripts/verify_test_fidelity.py --strict + - name: Pyrefly type check id: pyrefly continue-on-error: true @@ -75,6 +88,7 @@ jobs: echo "| Ruff check | ${{ steps.ruff_check.outcome }} |" >> $GITHUB_STEP_SUMMARY echo "| Ruff format | ${{ steps.ruff_format.outcome }} |" >> $GITHUB_STEP_SUMMARY echo "| Test audit | ${{ steps.audit.outcome }} |" >> $GITHUB_STEP_SUMMARY + echo "| Test fidelity | ${{ steps.fidelity.outcome }} |" >> $GITHUB_STEP_SUMMARY echo "| Pyrefly | ${{ steps.pyrefly.outcome }} |" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY if [ "${{ steps.pyrefly.outcome }}" = "success" ]; then @@ -89,10 +103,11 @@ jobs: RUFF_CHECK: ${{ steps.ruff_check.outcome }} RUFF_FORMAT: ${{ steps.ruff_format.outcome }} AUDIT: ${{ steps.audit.outcome }} + FIDELITY: ${{ steps.fidelity.outcome }} PYREFLY: ${{ steps.pyrefly.outcome }} run: | failures=0 - for var in RUFF_CHECK RUFF_FORMAT AUDIT PYREFLY; do + for var in RUFF_CHECK RUFF_FORMAT AUDIT FIDELITY PYREFLY; do outcome="${!var}" if [ "$outcome" != "success" ]; then echo "$var failed (outcome: $outcome)" diff --git a/CHANGELOG.md b/CHANGELOG.md index c664d14..8fd71da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -95,6 +95,18 @@ Parity catch-up with upstream `4.26.0`. No upstream version change. (`test_memory_state.py`, `test_state_postgres.py`). Closes the same flaky-test hazard fixed for the Redis backend in PR #73. +### CI / Internals + +- `verify_test_fidelity.py` now enforces against upstream on every PR + (`.github/workflows/lint.yml`); fails when the upstream clone is missing + or when any mapped TS file can't be found. Workflow runs `--strict` and + the clone step no longer carries `continue-on-error: true`, so infra + failures surface immediately at the job level. Baseline shipped empty + (all previously-missing tests ported in this release) — strict fidelity + for *mapped core files* (8 of 17 `packages/chat/src/*.test.ts` files; + see the `MAPPING` dict in `scripts/verify_test_fidelity.py` for the + authoritative scope list). Closes #53. + ## 0.4.26.1 (2026-04-23) Python-only follow-up on `0.4.26`. Still alpha — APIs may change. diff --git a/CLAUDE.md b/CLAUDE.md index f838b86..0b9772c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -105,5 +105,22 @@ async mock bugs, and cross-file duplicates. PRs that introduce hard failures will not pass CI. **Fidelity check** (`scripts/verify_test_fidelity.py`) verifies every TS -`it("...")` has a matching Python `def test_*()`. Must show 0 missing before -committing test changes. +`it("...")` in the mapped core files has a matching Python `def test_*()`, +pinned to `chat@4.26.0`. The `MAPPING` dict in that script is the +authoritative scope list — it currently covers 8 of 17 +`packages/chat/src/*.test.ts` files (extending it is tracked as a +follow-up). **CI runs `--strict`** (see `.github/workflows/lint.yml`): +any missing translation in a mapped file fails the build, and a missing +upstream checkout also fails (the script exits non-zero when any mapped +TS file isn't found). Baseline mode (the default without `--strict`) is +retained for local workflows where a few ports land in flight — +regenerate via `--update-baseline` after documenting intentional +divergence in `docs/UPSTREAM_SYNC.md`. + +Before the fidelity check can run locally, clone the pinned upstream +checkout (same command CI uses in `lint.yml`): +```bash +git clone --depth 1 --branch chat@4.26.0 \ + https://github.com/vercel/chat.git /tmp/vercel-chat +``` +Then `TS_ROOT=/tmp/vercel-chat uv run python scripts/verify_test_fidelity.py --strict`. diff --git a/docs/UPSTREAM_SYNC.md b/docs/UPSTREAM_SYNC.md index 1d60574..a905793 100644 --- a/docs/UPSTREAM_SYNC.md +++ b/docs/UPSTREAM_SYNC.md @@ -72,6 +72,38 @@ tests. If upstream tests lock in inconsistent behavior, choose one of: - **Preserve parity** and document the inconsistency in the non-parity section below - **Intentionally diverge** and document the divergence in the non-parity section +### Test fidelity (strict mode) + +`scripts/verify_test_fidelity.py` runs in CI (`.github/workflows/lint.yml`) pinned +to `vercel/chat@4.26.0` (matches the `UPSTREAM_PARITY` constant in +`src/chat_sdk/__init__.py`). **CI runs `--strict`** — the repo ships at 0 +missing *for mapped core files* as of `0.4.26.2` and the baseline +(`scripts/fidelity_baseline.json`) is empty. Scope is defined by the +`MAPPING` dict in the script: 8 of 17 `packages/chat/src/*.test.ts` files +today (extending to the remaining 9 is tracked as a follow-up). Unmapped +files are not checked — tightening scope requires editing `MAPPING` and +re-running `--strict`. + +Infra guardrails: + +- The workflow's `Clone upstream vercel/chat at pinned parity tag` step does + **not** use `continue-on-error` — a failed clone aborts the job loudly. +- The script itself fails with exit 1 if any mapped TS file is missing under + `TS_ROOT` (defense in depth against silent skips). + +Workflows: + +| Goal | Command | +|------|---------| +| Port a missing test | Write the Python test and land it; CI rejects anything that re-introduces a gap | +| Add a Python-only divergence (intentional skip) | Document in [Known Non-Parity](#known-non-parity-with-typescript-sdk), then `--update-baseline` and switch the workflow back to non-strict default for that file if truly unavoidable | +| Upstream sync | After pulling new upstream, run `--strict` — newly-added TS tests appear as missing and CI fails until ported | +| Final parity check | Same as CI: `TS_ROOT=/tmp/vercel-chat uv run python scripts/verify_test_fidelity.py --strict` | + +Baseline mode (the default without `--strict`) is retained for local +development where a few ports land in flight. Regenerate the baseline via +`--update-baseline` rather than hand-editing. + ## Divergence Policy Every divergence from upstream has a cost: merge conflicts on future syncs, diff --git a/pyproject.toml b/pyproject.toml index 5f04908..9b19b53 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,20 @@ name = "chat-sdk" version = "0.4.26.2" description = "Multi-platform async chat SDK for Python — port of Vercel Chat" +keywords = [ + "chat", + "chatbot", + "chatops", + "slack-bot", + "discord-bot", + "telegram-bot", + "teams-bot", + "whatsapp-bot", + "bot-framework", + "async", + "asyncio", + "vercel", +] readme = "README.md" license = {text = "MIT"} requires-python = ">=3.10" @@ -16,7 +30,11 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Topic :: Communications", "Topic :: Communications :: Chat", + "Topic :: Internet", + "Topic :: Software Development :: Libraries :: Application Frameworks", + "Topic :: Software Development :: Libraries :: Python Modules", "Typing :: Typed", ] diff --git a/scripts/fidelity_baseline.json b/scripts/fidelity_baseline.json new file mode 100644 index 0000000..d9cfc42 --- /dev/null +++ b/scripts/fidelity_baseline.json @@ -0,0 +1,7 @@ +{ + "_comment": "Ratchet-down baseline for scripts/verify_test_fidelity.py. This repo ships at strict fidelity for mapped core files (0 missing) against chat@4.26.0, so the baseline is empty. Scope: the MAPPING dict in scripts/verify_test_fidelity.py is the authoritative list of TS files checked; it currently covers 8 of the 17 packages/chat/src/*.test.ts files. Default CI mode runs --strict via .github/workflows/lint.yml; this file is retained for local workflows that want to opt back into baseline mode (e.g. during an upstream sync where several ports land in flight). To baseline genuinely-divergent tests, run scripts/verify_test_fidelity.py --update-baseline after documenting the divergence in docs/UPSTREAM_SYNC.md.", + "ts_parity": "chat@4.26.0", + "total_ts_tests": 588, + "total_missing": 0, + "missing": {} +} diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py index 6bb81b7..63f8acc 100644 --- a/scripts/verify_test_fidelity.py +++ b/scripts/verify_test_fidelity.py @@ -6,11 +6,28 @@ Python translation. Usage: - python scripts/verify_test_fidelity.py [--fix] - -With --fix: appends stub test functions for any missing translations. + python scripts/verify_test_fidelity.py --strict # CI path: fail on any missing + python scripts/verify_test_fidelity.py # baseline mode (local opt-in) + python scripts/verify_test_fidelity.py --fix # append stubs for missing + python scripts/verify_test_fidelity.py --update-baseline # rewrite baseline + +``--strict`` is the current CI contract (see ``.github/workflows/lint.yml``): +the baseline is ignored and any missing translation — or a missing upstream +checkout — fails the build. This repo ships at strict fidelity for mapped +core files (0 missing) against ``chat@4.26.0``. The ``MAPPING`` dict below +is the authoritative scope list; it currently covers 8 of the 17 +``packages/chat/src/*.test.ts`` files (extending it is tracked as a +follow-up). + +Baseline mode (the default without ``--strict``) is retained for local +workflows where a few ports land in flight: it succeeds iff the set of +missing tests is a subset of ``scripts/fidelity_baseline.json``. Tests that +are in the baseline but now pass are reported as fixed; new misses outside +the baseline fail. Regenerate via ``--update-baseline`` after documenting +intentional divergence in ``docs/UPSTREAM_SYNC.md``. """ +import json import os import re import sys @@ -18,6 +35,7 @@ TS_ROOT = os.environ.get("TS_ROOT", "/tmp/vercel-chat") PY_ROOT = os.environ.get("PY_ROOT", str(Path(__file__).parent.parent)) +BASELINE_PATH = Path(__file__).parent / "fidelity_baseline.json" # Mapping: TS test file -> Python test file MAPPING = { @@ -205,21 +223,148 @@ def count_absorbers(py_path: str) -> int: return count +def _current_parity_tag() -> str | None: + """Return the baseline-format parity tag (``chat@X.Y.Z``) for the current repo. + + Reads ``UPSTREAM_PARITY`` from ``src/chat_sdk/__init__.py`` without + importing the package (avoids pulling optional runtime deps during a + script run). Returns None if the constant can't be located. + """ + init_path = Path(__file__).parent.parent / "src" / "chat_sdk" / "__init__.py" + if not init_path.exists(): + return None + with open(init_path) as f: + content = f.read() + m = re.search(r'^UPSTREAM_PARITY\s*=\s*"([^"]+)"', content, re.MULTILINE) + if not m: + return None + return f"chat@{m.group(1)}" + + +def load_baseline(path: Path) -> dict[str, set[tuple[str, str]]]: + """Load fidelity baseline. Missing file returns empty baseline. + + Exits with code 1 when the baseline's ``ts_parity`` disagrees with the + current ``UPSTREAM_PARITY`` constant — a stale baseline could otherwise + silently mask upstream drift after a version bump. + """ + if not path.exists(): + return {} + with open(path) as f: + data = json.load(f) + baseline_parity = data.get("ts_parity") + current_parity = _current_parity_tag() + if baseline_parity and current_parity and baseline_parity != current_parity: + print( + f"\nbaseline parity mismatch: {path.name} was generated for " + f"upstream {baseline_parity}, but current parity is " + f"{current_parity} — re-run with `--update-baseline` after " + f"confirming the diff.", + file=sys.stderr, + ) + sys.exit(1) + out: dict[str, set[tuple[str, str]]] = {} + for ts_rel, entries in data.get("missing", {}).items(): + out[ts_rel] = {(e[0], e[1]) for e in entries} + return out + + +_DEFAULT_BASELINE_COMMENT = ( + "Ratchet-down baseline for scripts/verify_test_fidelity.py. This " + "repo ships at strict fidelity for mapped core files (0 missing) " + "against the current UPSTREAM_PARITY tag, so the baseline is " + "normally empty. Scope: the MAPPING dict in " + "scripts/verify_test_fidelity.py is the authoritative list of TS " + "files checked; it currently covers 8 of the 17 " + "packages/chat/src/*.test.ts files. Default CI mode runs --strict " + "via .github/workflows/lint.yml; this file is retained for local " + "workflows that want to opt back into baseline mode (e.g. during " + "an upstream sync where several ports land in flight). To " + "baseline genuinely-divergent tests, run " + "scripts/verify_test_fidelity.py --update-baseline after " + "documenting the divergence in docs/UPSTREAM_SYNC.md." +) + + +def write_baseline(path: Path, all_missing: dict[str, list], total_ts: int) -> None: + """Persist the current set of missing tests as the new baseline. + + If ``path`` already exists and has a ``_comment`` field, that curated + comment is preserved so hand-written context (e.g. scope qualifiers, + shipping-posture notes) isn't silently overwritten on every + ``--update-baseline`` run. Only fresh files get the default boilerplate. + """ + existing_comment: str | None = None + if path.exists(): + try: + with open(path) as f: + existing = json.load(f) + if isinstance(existing.get("_comment"), str): + existing_comment = existing["_comment"] + except (OSError, json.JSONDecodeError): + existing_comment = None + + # Derive ts_parity from UPSTREAM_PARITY so a fresh regen after an + # upstream version bump doesn't self-trap on a stale literal. Fall + # back to the last-known literal only if UPSTREAM_PARITY can't be + # read (e.g. __init__.py missing during an in-flight refactor). + current_parity = _current_parity_tag() + payload = { + "_comment": existing_comment if existing_comment is not None else _DEFAULT_BASELINE_COMMENT, + "ts_parity": current_parity if current_parity is not None else "chat@4.26.0", + "total_ts_tests": total_ts, + "total_missing": sum(len(v) for v in all_missing.values()), + "missing": { + ts_rel: [[d, t] for d, t, _p in sorted(entries, key=lambda e: (e[0], e[1]))] + for ts_rel, entries in sorted(all_missing.items()) + if entries + }, + } + with open(path, "w") as f: + json.dump(payload, f, indent=2, sort_keys=False) + f.write("\n") + + def main() -> int: fix_mode = "--fix" in sys.argv + strict_mode = "--strict" in sys.argv + update_baseline = "--update-baseline" in sys.argv + + if strict_mode and update_baseline: + print( + "error: --strict and --update-baseline are mutually exclusive.\n" + " --strict says 'no missing allowed'; --update-baseline says " + "'snapshot whatever is missing into the allowlist'. Pick one.", + file=sys.stderr, + ) + return 2 + + baseline = {} if (strict_mode or update_baseline) else load_baseline(BASELINE_PATH) + total_missing = 0 total_matched = 0 total_ts = 0 total_absorbers = 0 + all_missing: dict[str, list] = {} + new_misses: dict[str, list[tuple[str, str]]] = {} + fixed: dict[str, list[tuple[str, str]]] = {} + missing_ts_files: list[str] = [] print("=" * 70) print("TEST FIDELITY REPORT") + if strict_mode: + print(" mode: --strict (baseline ignored)") + elif update_baseline: + print(" mode: --update-baseline (rewriting baseline)") + else: + print(f" mode: baseline ({BASELINE_PATH.name})") print("=" * 70) for ts_rel, py_rel in MAPPING.items(): ts_path = os.path.join(TS_ROOT, ts_rel) if not os.path.exists(ts_path): - print(f"\n{ts_rel} — SKIPPED (file not found)") + print(f"\n{ts_rel} — MISSING (upstream TS file not found at {ts_path})") + missing_ts_files.append(ts_path) continue ts_tests = extract_ts_tests(ts_path) @@ -231,6 +376,16 @@ def main() -> int: total_matched += matched total_missing += len(missing) total_absorbers += absorbers + all_missing[ts_rel] = missing + + current_missing_keys = {(d, t) for d, t, _p in missing} + baseline_keys = baseline.get(ts_rel, set()) + file_new = sorted(current_missing_keys - baseline_keys) + file_fixed = sorted(baseline_keys - current_missing_keys) + if file_new: + new_misses[ts_rel] = file_new + if file_fixed: + fixed[ts_rel] = file_fixed absorber_note = f" ({absorbers} absorbers)" if absorbers else "" status = "OK" if not missing else f"GAPS ({len(missing)})" @@ -243,7 +398,8 @@ def main() -> int: if missing: for describe, ts_name, _py_name in missing[:5]: - print(f" MISSING: [{describe}] {ts_name}") + marker = "NEW" if (describe, ts_name) in set(file_new) else "baselined" + print(f" MISSING ({marker}): [{describe}] {ts_name}") if len(missing) > 5: print(f" ... and {len(missing) - 5} more") @@ -272,10 +428,66 @@ def main() -> int: else: print(f"TOTAL: {total_matched}/{total_ts} matched ({pct}%), {total_missing} missing") - if total_missing > 0: - print("\nRun with --fix to generate stubs for missing tests.") + # Infra guard: if any mapped TS file is missing, we cannot verify fidelity. + # Do NOT treat this as success — a failed upstream clone would otherwise + # silently pass CI. Fail loudly before any downstream success branches. + if missing_ts_files: + print( + f"\nupstream checkout missing — cannot verify fidelity. " + f"{len(missing_ts_files)} mapped TS file(s) not found under TS_ROOT={TS_ROOT!r}:" + ) + for path in missing_ts_files: + print(f" - {path}") + print( + "\nClone the upstream repo at the pinned parity tag, e.g.:\n" + " git clone --depth 1 --branch chat@4.26.0 " + "https://github.com/vercel/chat.git /tmp/vercel-chat\n" + "then re-run with TS_ROOT=/tmp/vercel-chat." + ) return 1 - print("\nAll TS tests have Python equivalents.") + + if update_baseline: + write_baseline(BASELINE_PATH, all_missing, total_ts) + print(f"\nBaseline written to {BASELINE_PATH}") + print(f" {total_missing} missing tests baselined across {sum(1 for v in all_missing.values() if v)} files") + return 0 + + if total_missing == 0: + print("\nAll TS tests have Python equivalents.") + if any(baseline.values()): + print("Baseline is stale — run with --update-baseline to clear it.") + return 0 + + if strict_mode: + print(f"\n{total_missing} missing (strict mode — baseline ignored).") + print("Run with --fix to generate stubs for missing tests.") + return 1 + + if new_misses: + new_count = sum(len(v) for v in new_misses.values()) + print(f"\n{new_count} NEW miss(es) outside the baseline:") + for ts_rel, entries in new_misses.items(): + for describe, ts_name in entries: + print(f" - {ts_rel} :: [{describe}] {ts_name}") + print("\nOptions:") + print(" 1. Port the missing TS test(s) to the matching Python file") + print(" 2. If intentional divergence, document in docs/UPSTREAM_SYNC.md") + print(" and re-baseline with --update-baseline") + print("\nRun with --fix to generate Python stubs for missing tests.") + return 1 + + if fixed: + fixed_count = sum(len(v) for v in fixed.values()) + print(f"\n✓ {fixed_count} test(s) fixed since baseline (no longer missing):") + for _ts_rel, entries in fixed.items(): + for describe, ts_name in entries[:5]: + print(f" - [{describe}] {ts_name}") + if len(entries) > 5: + print(f" ... and {len(entries) - 5} more") + print("\nRun with --update-baseline to tighten the baseline.") + + baseline_total = sum(len(v) for v in baseline.values()) + print(f"\n{total_missing}/{baseline_total} baseline miss(es) still present — no new drift.") return 0 diff --git a/src/chat_sdk/adapters/teams/format_converter.py b/src/chat_sdk/adapters/teams/format_converter.py index a96199b..23596ea 100644 --- a/src/chat_sdk/adapters/teams/format_converter.py +++ b/src/chat_sdk/adapters/teams/format_converter.py @@ -22,11 +22,7 @@ get_node_value, parse_markdown, ) - - -def _escape_table_cell(value: str) -> str: - """Escape pipe characters in table cells for GFM rendering.""" - return value.replace("\\", "\\\\").replace("|", "\\|").replace("\n", " ") +from chat_sdk.shared.card_utils import escape_table_cell class TeamsFormatConverter(BaseFormatConverter): @@ -194,11 +190,11 @@ def _table_to_gfm(self, node: Content) -> str: lines: list[str] = [] # Header row - lines.append(f"| {' | '.join(_escape_table_cell(c) for c in rows[0])} |") + lines.append(f"| {' | '.join(escape_table_cell(c) for c in rows[0])} |") # Separator separators = ["---"] * len(rows[0]) lines.append(f"| {' | '.join(separators)} |") # Data rows for row in rows[1:]: - lines.append(f"| {' | '.join(_escape_table_cell(c) for c in row)} |") + lines.append(f"| {' | '.join(escape_table_cell(c) for c in row)} |") return "\n".join(lines) diff --git a/tests/test_cards.py b/tests/test_cards.py index fe4d278..2ba7c82 100644 --- a/tests/test_cards.py +++ b/tests/test_cards.py @@ -8,6 +8,7 @@ is_card_element, table_element_to_ascii, ) +from chat_sdk.shared.card_utils import escape_table_cell, render_gfm_table class TestIsCardElement: @@ -159,3 +160,51 @@ def test_unknown_element(self): def test_button_element_returns_none(self): child = {"type": "button", "label": "Click me"} assert card_child_to_fallback_text(child) is None + + +class TestEscapeTableCell: + """Tests for shared.card_utils.escape_table_cell.""" + + def test_plain_text_passthrough(self): + assert escape_table_cell("hello world") == "hello world" + + def test_pipe_escaped(self): + assert escape_table_cell("a|b") == r"a\|b" + + def test_backslash_doubled_before_pipe_escape(self): + # Backslash must be doubled FIRST so that a literal `\|` in input + # doesn't collide with the subsequent pipe-escape. + assert escape_table_cell(r"a\b") == r"a\\b" + assert escape_table_cell(r"a\|b") == r"a\\\|b" + + def test_newline_collapsed_to_space(self): + assert escape_table_cell("line1\nline2") == "line1 line2" + + def test_multiple_substitutions(self): + assert escape_table_cell("a|b\nc\\d") == r"a\|b c\\d" + + def test_empty_string(self): + assert escape_table_cell("") == "" + + +class TestRenderGfmTable: + """Tests for shared.card_utils.render_gfm_table.""" + + def test_basic_table(self): + lines = render_gfm_table(["h1", "h2"], [["a", "b"], ["c", "d"]]) + assert lines == [ + "| h1 | h2 |", + "| --- | --- |", + "| a | b |", + "| c | d |", + ] + + def test_cells_are_escaped(self): + lines = render_gfm_table(["col"], [["pipe|inside"], ["has\nnewline"]]) + assert r"pipe\|inside" in lines[2] + assert "has newline" in lines[3] + + def test_empty_rows(self): + # No data rows — only header + separator. + lines = render_gfm_table(["only"], []) + assert lines == ["| only |", "| --- |"]