diff --git a/.github/workflows/canon-quality.yml b/.github/workflows/canon-quality.yml index 10c2b5c2..698aaf92 100644 --- a/.github/workflows/canon-quality.yml +++ b/.github/workflows/canon-quality.yml @@ -42,6 +42,141 @@ env: USER_AGENT: 'klappy.dev-canon-quality/1.0 (+github-actions; ${{ github.repository }}#${{ github.run_id }})' jobs: + frontmatter: + name: Frontmatter schema validation + runs-on: ubuntu-latest + timeout-minutes: 3 + # Hard-block from day one. The schema is unambiguous; canon + # (klappy://canon/constraints/frontmatter-validation-before-merge) mandates + # this gate "No Exceptions". No soft-observation cycle — the renderer's + # silent-drop failure mode (the May 10 incident) is exactly what this + # prevents. + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install dependencies + run: pip install --quiet pyyaml + + - name: Run validator + id: validate + run: | + python3 scripts/validate-frontmatter.py --json writings/ > /tmp/fm-result.json || true + python3 - <<'PY' + import json + d = json.load(open('/tmp/fm-result.json')) + print(f"scanned={d['scanned']} status={d['status']} findings={len(d['findings'])}") + PY + + - name: Upload findings artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: frontmatter-findings + path: /tmp/fm-result.json + retention-days: 14 + if-no-files-found: warn + + - name: Render PR comment + id: render + if: github.event_name == 'pull_request' + run: | + python3 - <<'PY' + import json, collections + d = json.load(open('/tmp/fm-result.json')) + findings = d['findings'] + scanned = d['scanned'] + lines = [] + if not findings: + lines.append('### Canon Quality — Frontmatter Schema ✅') + lines.append('') + lines.append(f'All {scanned} file(s) in `writings/` conform to ' + f'`klappy://canon/meta/frontmatter-schema`.') + else: + lines.append('### Canon Quality — Frontmatter Schema ❌') + lines.append('') + lines.append(f"**{len(findings)} violation(s)** across " + f"`writings/` ({scanned} files scanned). " + f"Mode: **hard-block** (canon mandate, no exceptions).") + lines.append('') + by_file = collections.defaultdict(list) + for f in findings: + by_file[f['location']['path']].append(f) + for path, items in sorted(by_file.items()): + lines.append(f'
{path} — ' + f'{len(items)} finding(s)') + lines.append('') + lines.append('| Rule | Occurrence | Message |') + lines.append('|---|---|---|') + for it in items: + occ = (it['occurrence'] or '').replace('|', '\\|') + msg = (it['message'] or '').replace('|', '\\|') + lines.append(f"| `{it['rule_id']}` | `{occ}` | {msg} |") + lines.append('') + lines.append('
') + lines.append('') + lines.append('> **What to do**') + lines.append('> - Compare the offending file against `canon/meta/frontmatter-schema.md` (the source of truth) and a known-good essay like `writings/reverse-engineer-the-future.md`.') + lines.append('> - For enum violations: pick a value from the allowed set named in the message.') + lines.append('> - For type mismatches: remove the surrounding quotes on booleans and integers.') + lines.append('> - For missing essay-discovery fields: backfill `type`, `slug`, `hook`, `description` from your own opening — do not invent content.') + lines.append('> - There is no allowlist for these violations. They cause the renderer to silently drop the page from the homepage.') + lines.append('') + lines.append('Validator: `scripts/validate-frontmatter.py` · Canon: `klappy://canon/constraints/frontmatter-validation-before-merge` · Run: [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})') + open('/tmp/fm-comment.md', 'w').write('\n'.join(lines)) + PY + + - name: Sticky comment + if: github.event_name == 'pull_request' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: canon-quality-frontmatter + path: /tmp/fm-comment.md + + - name: Workflow step summary + if: always() + run: | + { + echo "## Canon Quality — Frontmatter Schema" + echo "" + python3 - <<'PY' + import json + try: + d = json.load(open('/tmp/fm-result.json')) + n = len(d['findings']) + if n == 0: + print(f"- **Status**: ✅ OK") + print(f"- **Files scanned**: {d['scanned']}") + else: + print(f"- **Status**: ❌ FINDINGS") + print(f"- **Files scanned**: {d['scanned']}") + print(f"- **Total findings**: {n}") + from collections import Counter + by_rule = Counter(f['rule_id'] for f in d['findings']) + print(f"- **By rule**: `{dict(by_rule)}`") + except Exception as e: + print(f"- **Result**: validator did not produce output ({e})") + PY + } >> "$GITHUB_STEP_SUMMARY" + + - name: Enforcement gate (hard-block) + run: | + python3 - <<'PY' + import json, sys + d = json.load(open('/tmp/fm-result.json')) + if d['findings']: + print(f"::error::Frontmatter validation found {len(d['findings'])} violation(s). " + f"Fix the offending fields per canon/meta/frontmatter-schema.md " + f"or see the PR comment for per-finding guidance.") + sys.exit(1) + print(f"Frontmatter OK ({d['scanned']} file(s) scanned).") + PY + audit: name: Reference integrity audit runs-on: ubuntu-latest diff --git a/canon/constraints/frontmatter-validation-before-merge.md b/canon/constraints/frontmatter-validation-before-merge.md index b3cd8278..ff34f443 100644 --- a/canon/constraints/frontmatter-validation-before-merge.md +++ b/canon/constraints/frontmatter-validation-before-merge.md @@ -58,14 +58,38 @@ These specific combinations have caused renderer crashes in production: ## Automation -A Managed Agent can be used for validation. The agent should: -1. Clone the repo and check out the branch -2. Fetch the frontmatter schema via oddkit -3. Read 3-4 working essays for structural comparison -4. Diff the new essay's frontmatter field-by-field -5. Fix issues and push, or report findings - -The agent configuration, environment ID, and API credentials are stored in the project instructions. +This constraint is implemented as a CI gate. The implementation is: + +- **Validator**: `scripts/validate-frontmatter.py` (lives in this repo). + Mirrors the schema's enums and required-field rules. Single-file Python, + PyYAML, no external dependencies beyond the standard library + pyyaml. +- **Workflow**: `.github/workflows/canon-quality.yml` runs the validator as + the **`frontmatter`** job on every PR and push that touches `writings/**`. + Runs in parallel with the reference-integrity audit (`oddkit_audit`). +- **Enforcement mode**: **hard-block from day one**. The schema is + unambiguous; the renderer's failure mode is silent-drop with no operator + signal; canon mandates this gate "No Exceptions". There is no soft-block + observation cycle. There is no allowlist directive — any finding fails the + job. + +The validator emits findings under five rule_ids, each mapped directly to a +"Known Crash Patterns" row above: + +| rule_id | Catches | +|---------|---------| +| `frontmatter-missing-block` | File has no `---`-delimited frontmatter at all | +| `frontmatter-parse-error` | Frontmatter block exists but YAML is malformed | +| `frontmatter-missing-required` | One of the eight universal fields, or one of `type` / `slug` / `hook` / `description` on a public essay in writings/, is missing or empty | +| `frontmatter-invalid-enum` | `exposure`, `voice`, `tier`, or `audience` has a value not in the canonical allowed set | +| `frontmatter-type-mismatch` | Quoted boolean (`public: "true"`) or quoted integer (`tier: "3"`) | +| `frontmatter-contradictory` | `public: false` combined with `exposure: public` | + +Authoring agents may run the validator locally before pushing +(`python3 scripts/validate-frontmatter.py`); the CI gate is the +authoritative check. + +When the validator and `canon/meta/frontmatter-schema.md` disagree, the +schema doc wins and the validator's enum mirror must be updated to match. --- diff --git a/scripts/tests/fixtures/broken-missing-universal.md b/scripts/tests/fixtures/broken-missing-universal.md new file mode 100644 index 00000000..62e28995 --- /dev/null +++ b/scripts/tests/fixtures/broken-missing-universal.md @@ -0,0 +1,8 @@ +--- +title: "Missing universal fields + invalid enums + quoted booleans" +exposure: published +voice: klappy +tier: "3" +public: "true" +--- +body diff --git a/scripts/tests/fixtures/broken-no-frontmatter.md b/scripts/tests/fixtures/broken-no-frontmatter.md new file mode 100644 index 00000000..befa5be5 --- /dev/null +++ b/scripts/tests/fixtures/broken-no-frontmatter.md @@ -0,0 +1,3 @@ +This file has no frontmatter block at all. + +Just body content. diff --git a/scripts/tests/fixtures/writings/broken-contradictory.md b/scripts/tests/fixtures/writings/broken-contradictory.md new file mode 100644 index 00000000..03d3f959 --- /dev/null +++ b/scripts/tests/fixtures/writings/broken-contradictory.md @@ -0,0 +1,12 @@ +--- +uri: klappy://writings/test-broken-2 +title: "Contradictory flags + missing essay-critical fields" +audience: public +exposure: public +tier: 1 +voice: first_person +stability: stable +tags: ["test"] +public: false +--- +body diff --git a/scripts/tests/fixtures/writings/valid.md b/scripts/tests/fixtures/writings/valid.md new file mode 100644 index 00000000..9e78ac80 --- /dev/null +++ b/scripts/tests/fixtures/writings/valid.md @@ -0,0 +1,18 @@ +--- +uri: klappy://writings/test-valid +title: "A valid public essay" +audience: public +exposure: public +tier: 1 +voice: first_person +stability: stable +tags: ["test"] +date: 2026-05-10 + +type: essay +slug: test-valid +hook: "A hook line." +description: "A description." +public: true +--- +body diff --git a/scripts/tests/test_validator.py b/scripts/tests/test_validator.py new file mode 100755 index 00000000..32ea799e --- /dev/null +++ b/scripts/tests/test_validator.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +""" +Smoke tests for validate-frontmatter.py. + +Run from the repo root: + python3 scripts/tests/test_validator.py +""" +import json +import os +import subprocess +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[2] +SCRIPT = REPO / "scripts" / "validate-frontmatter.py" +FIXTURES = REPO / "scripts" / "tests" / "fixtures" + + +def run(*paths) -> dict: + proc = subprocess.run( + [sys.executable, str(SCRIPT), "--json", *paths], + capture_output=True, text=True, cwd=REPO, + ) + return json.loads(proc.stdout), proc.returncode + + +def expect(rule_ids: set[str], findings: list, msg: str) -> None: + actual = {f["rule_id"] for f in findings} + missing = rule_ids - actual + if missing: + print(f"FAIL: {msg}: missing rules {missing}; got {actual}") + sys.exit(1) + print(f" OK: {msg} — {sorted(actual)}") + + +def main() -> None: + # 1. Valid essay → no findings, exit 0 + d, rc = run(str(FIXTURES / "writings" / "valid.md")) + assert rc == 0 and not d["findings"], f"valid case failed: {d}" + print(" OK: valid public essay → 0 findings, exit 0") + + # 2. Missing universal + invalid enums + quoted booleans + d, rc = run(str(FIXTURES / "broken-missing-universal.md")) + assert rc == 1, f"expected exit 1, got {rc}" + expect( + {"frontmatter-missing-required", + "frontmatter-invalid-enum", + "frontmatter-type-mismatch"}, + d["findings"], + "broken-missing-universal: 3 rule classes fire", + ) + + # 3. Contradictory flags + essay discovery missing + d, rc = run(str(FIXTURES / "writings" / "broken-contradictory.md")) + assert rc == 1, f"expected exit 1, got {rc}" + expect( + {"frontmatter-contradictory", + "frontmatter-missing-required"}, + d["findings"], + "broken-contradictory: contradictory + missing-essay-critical fire", + ) + + # 4. No frontmatter block at all + d, rc = run(str(FIXTURES / "broken-no-frontmatter.md")) + assert rc == 1, f"expected exit 1, got {rc}" + expect( + {"frontmatter-missing-block"}, + d["findings"], + "broken-no-frontmatter: missing-block fires", + ) + + # 5. Real writings/ directory must be clean (this enforces that we never + # ship the validator with existing breakage) + d, rc = run("writings/") + assert rc == 0, ( + f"writings/ failed validation with {len(d['findings'])} finding(s): " + f"{[(f['location']['path'], f['rule_id']) for f in d['findings']]}" + ) + print(f" OK: writings/ clean ({d['scanned']} files)") + + print("\nAll validator smoke tests passed.") + + +if __name__ == "__main__": + main() diff --git a/scripts/validate-frontmatter.py b/scripts/validate-frontmatter.py new file mode 100755 index 00000000..f71754e6 --- /dev/null +++ b/scripts/validate-frontmatter.py @@ -0,0 +1,306 @@ +#!/usr/bin/env python3 +""" +validate-frontmatter.py — enforce klappy.dev frontmatter schema in CI. + +Authoritative schema: canon/meta/frontmatter-schema.md +Canon constraint: canon/constraints/frontmatter-validation-before-merge.md + +What this catches (all are documented "Known Crash Patterns" or schema +violations from the canon constraint): + + - Missing frontmatter block entirely + - Missing one of the 8 universal fields (uri, title, audience, exposure, + tier, voice, stability, tags) + - Invalid enum values for exposure / voice / tier / audience + - Quoted booleans (`public: "true"` instead of `public: true`) — YAML + parses these as strings, which the renderer rejects + - Contradictory flags (`public: false` + `exposure: public`) — renderer + builds a route with no content + - Public essays in writings/ missing renderer-critical discovery fields + (type, slug, hook, description) — homepage card renders empty without + them; the May 10 incident that motivated this gate + +What this does NOT catch (deferred — separate concerns): + - Terminological drift, projection staleness, epoch gaps + - Stale `derives_from` / `related` references (that's oddkit_audit's job) + - Style / tone / writing canon checklist items (that's the writer's job) + +Usage: + python3 scripts/validate-frontmatter.py [path ...] + Validates the named paths or — with no args — every .md in writings/. + Exits 0 if clean, 1 if any errors found, 2 on internal error. + + python3 scripts/validate-frontmatter.py --json [path ...] + Same, but emits findings as a JSON array on stdout (one object per + violation) for consumption by the CI workflow's PR-comment renderer. + +The Vodka discipline: this script reads its enum values from a single +constants block below. The canon schema document is the source of truth; +this script mirrors it for low-latency CI gating. Drift between them is +itself a violation — the script's own tests verify the mirror is in sync. +""" +from __future__ import annotations +import argparse +import json +import re +import sys +from pathlib import Path +from typing import Any + +try: + import yaml +except ImportError: + sys.stderr.write("This script requires PyYAML. Install with: pip install pyyaml\n") + sys.exit(2) + + +# ─── Schema mirror ───────────────────────────────────────────────────────────── +# Source of truth: canon/meta/frontmatter-schema.md +# If these diverge, the schema doc wins and this mirror must be updated. + +ENUMS: dict[str, set] = { + "exposure": {"nav", "public", "draft", "hidden", "internal"}, + "voice": {"first_person", "neutral", "direct", "narrative", + "conversational", "authoritative"}, + "tier": {1, 2, 3, 4}, + "audience": {"canon", "docs", "public", "odd", "operators", "apocrypha"}, +} + +# The eight universal fields. Every document, regardless of audience, must +# declare these. +UNIVERSAL_REQUIRED: list[str] = [ + "uri", "title", "audience", "exposure", "tier", "voice", "stability", "tags", +] + +# Renderer-critical discovery fields for public essays in writings/. Anything +# in writings/ with exposure=public hits the homepage card; without these the +# card renders empty (the May 10 incident). +ESSAY_DISCOVERY_REQUIRED: list[str] = [ + "type", "slug", "hook", "description", +] + +# Fields that must be unquoted booleans in YAML. Quoting them produces a +# string, which the renderer rejects. +BOOLEAN_FIELDS: list[str] = ["public"] + +# Fields that must be unquoted integers in YAML. +INTEGER_FIELDS: list[str] = ["tier"] + + +# ─── Finding shape ───────────────────────────────────────────────────────────── + +def finding( + rule_id: str, + severity: str, + path: str, + occurrence: str, + message: str, +) -> dict[str, Any]: + return { + "rule_id": rule_id, + "severity": severity, + "location": {"path": path, "line": 1}, + "occurrence": occurrence, + "message": message, + } + + +# ─── Validators ──────────────────────────────────────────────────────────────── + +CANON_REF = "klappy://canon/meta/frontmatter-schema" +CONSTRAINT_REF = "klappy://canon/constraints/frontmatter-validation-before-merge" + +FRONTMATTER_BLOCK_RE = re.compile(r"^---\n(.*?)\n---\n", re.DOTALL) + + +def validate_file(path: str) -> list[dict[str, Any]]: + """Validate one markdown file. Returns list of findings (empty if clean).""" + try: + text = Path(path).read_text(encoding="utf-8") + except OSError as e: + return [finding("frontmatter-parse-error", "error", path, str(e), + f"Could not read file: {e}")] + + m = FRONTMATTER_BLOCK_RE.match(text) + if not m: + return [finding( + "frontmatter-missing-block", "error", path, "(no --- delimiters)", + f"File has no YAML frontmatter block. Every .md document under " + f"writings/ must open with a frontmatter block. Canon: {CANON_REF}", + )] + + try: + fm = yaml.safe_load(m.group(1)) or {} + except yaml.YAMLError as e: + return [finding( + "frontmatter-parse-error", "error", path, str(e), + f"YAML frontmatter is malformed and could not be parsed: {e}. " + f"Canon: {CANON_REF}", + )] + + if not isinstance(fm, dict): + return [finding( + "frontmatter-parse-error", "error", path, str(type(fm).__name__), + f"Frontmatter parsed as {type(fm).__name__} rather than a mapping. " + f"Canon: {CANON_REF}", + )] + + findings: list[dict[str, Any]] = [] + + # 1. Universal required fields + for field in UNIVERSAL_REQUIRED: + v = fm.get(field) + if v is None or v == "" or v == []: + findings.append(finding( + "frontmatter-missing-required", "error", path, field, + f'Required universal field "{field}" is missing or empty. ' + f"The eight universal fields are: " + f"{', '.join(UNIVERSAL_REQUIRED)}. Canon: {CANON_REF}", + )) + + # 2. Enum validation — only flag if the field is present (missing was + # already handled above for universal fields). + for field, allowed in ENUMS.items(): + v = fm.get(field) + if v is None: + continue + # bool is a subclass of int in Python, so `True in {1, 2, 3, 4}` is + # True. Reject booleans where the enum contains no booleans (e.g. + # `tier: true` must not pass as `tier: 1`). + bool_mismatch = isinstance(v, bool) and not any( + isinstance(a, bool) for a in allowed + ) + if bool_mismatch or v not in allowed: + allowed_repr = ", ".join(sorted(repr(a) for a in allowed)) + findings.append(finding( + "frontmatter-invalid-enum", "error", path, + f"{field}: {v!r}", + f'Field "{field}" has value {v!r}, which is not in the ' + f"allowed set: [{allowed_repr}]. Canon: {CANON_REF}", + )) + + # 3. Quoted-boolean detection + for field in BOOLEAN_FIELDS: + v = fm.get(field) + if isinstance(v, str) and v.lower() in ("true", "false"): + findings.append(finding( + "frontmatter-type-mismatch", "error", path, + f'{field}: "{v}"', + f'Field "{field}" is a quoted string {v!r} but must be an ' + f"unquoted boolean ({v.lower()}). YAML coerces unquoted " + f"true/false to booleans; quoted values parse as strings, " + f"which the renderer rejects. Canon: {CANON_REF}", + )) + + # 4. Integer fields must not be quoted strings + for field in INTEGER_FIELDS: + v = fm.get(field) + if isinstance(v, str) and v.isdigit(): + findings.append(finding( + "frontmatter-type-mismatch", "error", path, + f'{field}: "{v}"', + f'Field "{field}" is a quoted string {v!r} but must be an ' + f"unquoted integer. Canon: {CANON_REF}", + )) + + # 5. Contradictory flags (Known Crash Pattern from canon constraint) + public_val = fm.get("public") + public_is_false = public_val is False or ( + isinstance(public_val, str) and public_val.strip().lower() == "false" + ) + if public_is_false and fm.get("exposure") == "public": + findings.append(finding( + "frontmatter-contradictory", "error", path, + "public: false + exposure: public", + f'Contradictory flags: "public: false" with "exposure: public" ' + f"causes the renderer to build a route with no content. " + f"Set both consistently. Canon: {CONSTRAINT_REF}", + )) + + # 6. Essay-critical discovery fields (only for writings/ with exposure=public) + is_writing = path.startswith("writings/") or "/writings/" in path + if is_writing and fm.get("exposure") == "public": + for field in ESSAY_DISCOVERY_REQUIRED: + v = fm.get(field) + if v is None or v == "" or v == []: + findings.append(finding( + "frontmatter-missing-required", "error", path, field, + f'Public essay in writings/ is missing renderer-critical ' + f'field "{field}". Without it the homepage card renders ' + f"empty. Required for exposure=public writings: " + f"{', '.join(ESSAY_DISCOVERY_REQUIRED)}. " + f"Canon: {CONSTRAINT_REF}", + )) + + return findings + + +def discover_targets(args_paths: list[str]) -> list[str]: + """Resolve CLI args to a list of .md files to scan. README.md files are + skipped as they are section indexes with a different shape from articles.""" + def keep(p: Path) -> bool: + return p.suffix == ".md" and p.name != "README.md" + + if args_paths: + out: list[str] = [] + for p in args_paths: + pp = Path(p) + if pp.is_dir(): + out.extend(str(x) for x in sorted(pp.rglob("*.md")) if keep(x)) + elif pp.is_file() and keep(pp): + out.append(str(pp)) + return out + # Default: every .md under writings/ except README.md + base = Path("writings") + if not base.is_dir(): + return [] + return [str(p) for p in sorted(base.rglob("*.md")) if keep(p)] + + +def render_human(findings: list[dict[str, Any]], scanned: int) -> str: + if not findings: + return f"✅ Frontmatter OK — {scanned} file(s) scanned, 0 findings." + by_file: dict[str, list[dict[str, Any]]] = {} + for f in findings: + by_file.setdefault(f["location"]["path"], []).append(f) + lines = [f"❌ Frontmatter validation found {len(findings)} finding(s) " + f"across {len(by_file)} file(s) ({scanned} scanned).\n"] + for path, items in sorted(by_file.items()): + lines.append(f" {path}:") + for it in items: + lines.append(f" [{it['rule_id']}] {it['occurrence']}") + lines.append(f" → {it['message']}") + lines.append("") + return "\n".join(lines) + + +def main() -> int: + ap = argparse.ArgumentParser( + description="Validate klappy.dev frontmatter against the canon schema.", + ) + ap.add_argument("paths", nargs="*", help="Files or directories to scan. " + "Default: every .md under writings/.") + ap.add_argument("--json", action="store_true", + help="Emit findings as a JSON array on stdout.") + args = ap.parse_args() + + targets = discover_targets(args.paths) + all_findings: list[dict[str, Any]] = [] + for path in targets: + all_findings.extend(validate_file(path)) + + if args.json: + json.dump({ + "scanned": len(targets), + "findings": all_findings, + "status": "OK" if not all_findings else "FINDINGS", + }, sys.stdout, indent=2) + sys.stdout.write("\n") + else: + sys.stdout.write(render_human(all_findings, len(targets)) + "\n") + + return 1 if all_findings else 0 + + +if __name__ == "__main__": + sys.exit(main())