From 209807d9a35ffb678dae4d2bd6feff553c70afae Mon Sep 17 00:00:00 2001 From: pavan Date: Thu, 19 Feb 2026 21:45:18 +0530 Subject: [PATCH 1/7] Add generic codexw PR-grade wrapper and profile-driven review flow --- .pre-commit-hooks.yaml | 51 + README.md | 146 ++ codexw | 2402 +++++++++++++++++++++++++++++ local-review-profile.example.yaml | 76 + 4 files changed, 2675 insertions(+) create mode 100755 codexw create mode 100644 local-review-profile.example.yaml diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 999e6ba..0e209bf 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -17,6 +17,33 @@ files: &sync_ai_rules_files (^\.cursor/rules/.*\.mdc$|^\.code_review/.*\.md$) pass_filenames: false +- id: codex-review + name: Codex AI Code Review + description: On-demand AI code review using OpenAI Codex CLI. Requires codex CLI installed and authenticated. + entry: codex review + language: system + pass_filenames: false + stages: [manual] + verbose: true + +- id: codex-review-pr-grade + name: Codex AI Code Review (PR-grade) + description: Profile-aware multi-pass Codex review via ./codexw (auto-generates and auto-syncs local-review-profile.yaml). + entry: ./codexw review + language: script + pass_filenames: false + stages: [manual] + verbose: true + +- id: codexw + name: Codex AI Code Review (PR-grade, codexw) + description: Alias for codex-review-pr-grade. Run with: pre-commit run codexw + entry: ./codexw review + language: script + pass_filenames: false + stages: [manual] + verbose: true + # Nobody should ever use these hooks in production. They're just for testing PRs in # the duolingo/pre-commit-hooks repo more easily without having to tag and push # temporary images to Docker Hub. Usage: edit a consumer repo's hook config to @@ -34,3 +61,27 @@ language: docker files: *sync_ai_rules_files pass_filenames: false + +- id: codex-review-dev + name: Codex AI Code Review (dev) + entry: codex review + language: system + pass_filenames: false + stages: [manual] + verbose: true + +- id: codex-review-pr-grade-dev + name: Codex AI Code Review (PR-grade, dev) + entry: ./codexw review + language: script + pass_filenames: false + stages: [manual] + verbose: true + +- id: codexw-dev + name: Codex AI Code Review (PR-grade, codexw dev) + entry: ./codexw review + language: script + pass_filenames: false + stages: [manual] + verbose: true diff --git a/README.md b/README.md index 1615d81..78b4849 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,148 @@ This hook synchronizes AI coding rules from `.cursor/rules/` and `.code_review/` This ensures all AI coding assistants stay aware of the same rules and coding conventions. +## Codex AI Code Review Hook (`codex-review`) + +On-demand AI code review using the OpenAI Codex CLI. This hook runs in `manual` stage by default, meaning it won't block normal commits. + +**Prerequisites:** +- Install Codex CLI: `brew install codex` or `npm install -g @openai/codex` +- Authenticate: `codex auth login` (uses Duolingo ChatGPT org credentials) + +**Usage:** +```bash +# Run Codex review on staged changes +pre-commit run codex-review + +# Run on all files +pre-commit run codex-review --all-files +``` + +For direct CLI usage without pre-commit: +```bash +codex review --uncommitted +codex review --base master +``` + +## Codex PR-grade Hook (`codexw`) + +Profile-aware multi-pass local review using `codexw`. This hook is also `manual` by default and does not block normal commits. + +It runs detailed PR-grade review from `local-review-profile.yaml`. +`codexw` also includes compatibility fallback for Codex CLI versions that reject prompt+target combinations. +Canonical command is `codexw review`; `codexw review-pr` is kept as a compatibility alias. +If profile is missing, `codexw` auto-generates `local-review-profile.yaml` on first run. +On each run, `codexw` auto-syncs profile entries derived from repository signals (rules/domains/domain prompts) while preserving manual overrides. Stale auto-managed entries are pruned when source-of-truth changes. + +PR-grade outputs now include: +- deterministic rule-coverage accounting (`rule-coverage-accounting.json`) +- machine-readable findings (`findings.json`, `findings.sarif`) +- waiver + baseline filtering (for strict gate on net-new active findings) +- optional GitHub publish adapter (`--publish-github`, optional inline comments) + +**Prerequisites:** +- Install Codex CLI: `brew install codex` or `npm install -g @openai/codex` +- Authenticate: `codex auth login` +- Optional: pre-seed `local-review-profile.yaml` in target repo root (see example below) + +**Usage:** +```bash +# Run PR-grade review for current diff vs profile default base branch +pre-commit run codexw + +# Run PR-grade review for all files (still uses profile + pass orchestration) +pre-commit run codexw --all-files +``` + +Direct execution (without pre-commit): +```bash +./codexw review +./codexw review --base main +./codexw review --domains core,testing --no-fail-on-findings +./codexw review --full-repo --max-files-per-shard 50 --parallel-shards 2 +# Runtime budget controls +./codexw review --max-passes 8 --time-budget-minutes 12 +# Create missing profile and exit +./codexw review --bootstrap-only +# Sync profile from repository signals and exit +./codexw review --sync-profile-only +# Use waivers/baseline files and update baseline from current active findings +./codexw review --waiver-file .codex/review-waivers.yaml --baseline-file .codex/review-baseline.json +./codexw review --update-baseline --no-fail-on-findings +# Replace baseline instead of merge +./codexw review --update-baseline --replace-baseline --no-fail-on-findings +# Validate profile loading only (no Codex run) +./codexw review --print-effective-profile +# Disable profile sync for one run +./codexw review --no-sync-profile +# Keep stale auto-managed profile entries for this run +./codexw review --no-prune-autogen +# Publish summary (and optional inline comments) to current PR +./codexw review --publish-github +./codexw review --publish-github --github-inline +# Note: inline comments are automatically skipped in --full-repo mode +``` + +`local-review-profile.yaml` schema (minimum practical shape): +```yaml +version: 1 + +repo: + name: Repo Name + +review: + default_base: main + strict_gate: true + depth_hotspots: 3 + output_root: .codex/review-runs + max_files_per_shard: 40 + max_shards: 5 + parallel_shards: 1 + max_passes: 0 # 0 = unlimited + time_budget_minutes: 0 # 0 = unlimited + waiver_file: .codex/review-waivers.yaml + baseline_file: .codex/review-baseline.json + +rules: + include: + - AGENTS.md + - .cursor/rules/**/*.mdc + +domains: + default: [core] + allowed: [core, testing] + +prompts: + global: | + Additional repo-wide review context. + by_domain: + testing: | + Additional testing-specific context. + +pipeline: + include_policy_pass: true + include_core_passes: true + include_domain_passes: true + include_depth_passes: true + policy_instructions: | + Custom policy pass instructions. + core_passes: + - id: core-breadth + name: Core breadth + shard: changed_files # none | changed_files + instructions: | + Custom breadth pass instructions. + depth_instructions: | + Task: + - Perform depth-first review of hotspot file: {hotspot} +``` + +Reference profile: +`local-review-profile.example.yaml` + +Backward-compatible hook id alias is available: +`codex-review-pr-grade` + ## Usage Repo maintainers can declare these hooks in `.pre-commit-config.yaml`: @@ -68,6 +210,10 @@ Repo maintainers can declare these hooks in `.pre-commit-config.yaml`: - --scala-version=3 # Defaults to Scala 2.12 # Sync AI rules hook (for repos with Cursor AI rules) - id: sync-ai-rules + # On-demand Codex AI code review (manual stage, requires codex CLI) + - id: codex-review + # On-demand PR-grade Codex review (manual stage, profile-aware) + - id: codexw ``` Directories named `build` and `node_modules` are excluded by default - no need to declare them in the hook's `exclude` key. diff --git a/codexw b/codexw new file mode 100755 index 0000000..8cc8a50 --- /dev/null +++ b/codexw @@ -0,0 +1,2402 @@ +#!/usr/bin/env python3 +"""Generic Codex PR-grade review wrapper (profile-aware).""" + +from __future__ import annotations + +import argparse +import datetime as dt +import fnmatch +import glob +import hashlib +import json +import os +import re +import shlex +import subprocess +import sys +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +from typing import Any + + +NO_FINDINGS_SENTINEL = "No actionable findings." + +DOMAIN_PROMPT_TEMPLATES: dict[str, str] = { + "experiments": ( + "Focus areas:\n" + "- experiment overtreatment risk and incorrect gating\n" + "- control/treatment behavior leaks and inverted conditions\n" + "- stale experiment branches and incomplete cleanup\n" + "- observeTreatmentRecord/getConditionAndTreat usage correctness\n" + "- missing test coverage for control/treatment behavior" + ), + "compose": ( + "Focus areas:\n" + "- required Compose conventions from rule files\n" + "- prohibited patterns (Text usage, theming violations, forbidden components)\n" + "- misuse of shared design-compose/common-compose primitives\n" + "- state/recomposition/lifecycle mistakes in composables" + ), + "coroutines": ( + "Focus areas:\n" + "- dispatcher injection and hardcoded dispatcher violations\n" + "- cancellation handling and structured concurrency issues\n" + "- viewModelScope/lifecycle flow collection correctness\n" + "- RxJava/Coroutines interop and migration pattern violations\n" + "- coroutine test pattern correctness (runTest/TestScope/dispatchers)" + ), + "testing": ( + "Focus areas:\n" + "- unit testing conventions and AAA structure violations\n" + "- required fake-vs-mock usage pattern violations\n" + "- FakeUsersRepository and User.emptyUser().copy migration correctness\n" + "- weak assertions and missing verification for side effects" + ), +} + +DEFAULT_GLOBAL_PROMPT = ( + "Use repository standards for lifecycle, state, architecture boundaries, and " + "production-safety. Prioritize behavior-changing issues and policy violations " + "over style-only comments." +) + +DEFAULT_POLICY_PASS_INSTRUCTIONS = ( + "Task:\n" + "- Enforce every standard file listed above.\n" + "- Output a 'Rule Coverage' section with one line per rule file:\n" + " :: Covered | NotApplicable :: short reason\n" + "- Then output actionable findings using the required schema.\n" + f"- If no actionable findings exist, include exactly this line: {NO_FINDINGS_SENTINEL}" +) + +DEFAULT_CORE_PASS_SPECS: list[dict[str, str]] = [ + { + "id": "core-breadth", + "name": "Core 1: breadth coverage across all changed files", + "instructions": ( + "Task:\n" + "- Perform full-breadth review across every changed file listed above.\n" + "- Output a 'Breadth Coverage' section with one line per changed file:\n" + " :: Reviewed | NotApplicable :: short reason\n" + "- Then output actionable findings using the required schema.\n" + f"- If no actionable findings exist, include exactly this line: {NO_FINDINGS_SENTINEL}" + ), + "shard": "changed_files", + }, + { + "id": "core-regressions", + "name": "Core 2: regressions/security/crash scan", + "instructions": ( + "Focus areas:\n" + "- behavioral regressions\n" + "- crash/nullability risks\n" + "- state corruption and data-loss risks\n" + "- security and privacy issues" + ), + "shard": "none", + }, + { + "id": "core-architecture", + "name": "Core 3: architecture/concurrency scan", + "instructions": ( + "Focus areas:\n" + "- architecture boundaries and dependency misuse\n" + "- lifecycle and concurrency/threading issues\n" + "- error-handling/fallback correctness\n" + "- protocol/contract boundary failures" + ), + "shard": "none", + }, + { + "id": "core-tests", + "name": "Core 4: test-coverage scan", + "instructions": ( + "Focus areas:\n" + "- missing tests required to protect the change\n" + "- high-risk edge cases without coverage\n" + "- regressions likely to escape without tests" + ), + "shard": "none", + }, +] + +DEFAULT_DEPTH_PASS_INSTRUCTIONS = ( + "Task:\n" + "- Perform depth-first review of hotspot file: {hotspot}\n" + "- Traverse directly related changed call paths\n" + "- Prioritize subtle behavioral, concurrency, state, and boundary-condition failures\n" + "- Output only actionable findings with required schema\n" + f"- If no actionable findings exist, include exactly this line: {NO_FINDINGS_SENTINEL}" +) + +DEFAULT_WAIVER_FILE = ".codex/review-waivers.yaml" +DEFAULT_BASELINE_FILE = ".codex/review-baseline.json" + + + +def die(message: str, code: int = 1) -> None: + print(f"error: {message}", file=sys.stderr) + raise SystemExit(code) + + +def run_checked(cmd: list[str], cwd: Path) -> str: + try: + proc = subprocess.run( + cmd, + cwd=str(cwd), + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + ) + except subprocess.CalledProcessError as exc: + stderr = exc.stderr.strip() + stdout = exc.stdout.strip() + details = stderr or stdout or "command failed" + die(f"{' '.join(shlex.quote(x) for x in cmd)} :: {details}") + return proc.stdout + + +def run_streaming(cmd: list[str], cwd: Path, out_file: Path) -> int: + with out_file.open("w", encoding="utf-8") as fh: + proc = subprocess.Popen( + cmd, + cwd=str(cwd), + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + assert proc.stdout is not None + for line in proc.stdout: + print(line, end="") + fh.write(line) + return proc.wait() + + +def run_captured(cmd: list[str], cwd: Path, out_file: Path, *, stream_output: bool) -> int: + proc = subprocess.run( + cmd, + cwd=str(cwd), + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + output = proc.stdout or "" + out_file.write_text(output, encoding="utf-8") + if stream_output and output: + print(output, end="") + return proc.returncode + + +def run_review_pass_with_compat( + repo_root: Path, + out_file: Path, + target_args: list[str], + target_desc: str, + prompt: str, + pass_name: str, + *, + stream_output: bool = True, +) -> None: + """Run one pass, with fallback for Codex CLI versions that reject prompt+target flags.""" + primary_cmd = ["codex", "review", *target_args, prompt] + exit_code = run_captured(primary_cmd, repo_root, out_file, stream_output=stream_output) + if exit_code == 0: + return + + content = out_file.read_text(encoding="utf-8", errors="replace") + prompt_target_incompat = "cannot be used with '[PROMPT]'" in content + if prompt_target_incompat and target_args: + print( + "warning: codex CLI rejected prompt+target flags; " + f"retrying pass '{pass_name}' in prompt-only compatibility mode.", + file=sys.stderr, + ) + compat_prefix = ( + "Target selection requested for this pass:\n" + f"- {target_desc}\n" + "Apply review findings to the requested target using the repository context below." + ) + compat_cmd = ["codex", "review", f"{compat_prefix}\n\n{prompt}"] + exit_code = run_captured(compat_cmd, repo_root, out_file, stream_output=stream_output) + if exit_code == 0: + return + + die(f"codex review failed in pass '{pass_name}' with exit code {exit_code}") + + +def find_repo_root(start: Path) -> Path: + try: + out = run_checked(["git", "rev-parse", "--show-toplevel"], start).strip() + if out: + return Path(out) + except SystemExit: + pass + return start + + +def git_ref_exists(repo_root: Path, ref: str) -> bool: + proc = subprocess.run( + ["git", "show-ref", "--verify", "--quiet", ref], + cwd=str(repo_root), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + text=True, + ) + return proc.returncode == 0 + + +def detect_default_base(repo_root: Path) -> str: + # Prefer local branch refs first, then origin refs. + for candidate in ("master", "main"): + if git_ref_exists(repo_root, f"refs/heads/{candidate}"): + return candidate + for candidate in ("master", "main"): + if git_ref_exists(repo_root, f"refs/remotes/origin/{candidate}"): + return candidate + return "main" + + +def infer_repo_name(repo_root: Path) -> str: + raw = repo_root.name.strip() + if not raw: + return "Repository" + + tokens = [t for t in re.split(r"[-_]+", raw) if t] + if not tokens: + return raw + + def normalize_token(token: str) -> str: + lowered = token.lower() + special = { + "ios": "iOS", + "android": "Android", + "api": "API", + "sdk": "SDK", + "ml": "ML", + "ai": "AI", + "ui": "UI", + } + return special.get(lowered, token.capitalize()) + + return " ".join(normalize_token(t) for t in tokens) + + +def infer_rule_patterns(repo_root: Path) -> list[str]: + patterns: list[str] = [] + if (repo_root / "AGENTS.md").is_file(): + patterns.append("AGENTS.md") + if (repo_root / ".cursor/rules").is_dir(): + patterns.append(".cursor/rules/**/*.mdc") + if (repo_root / ".code_review").is_dir(): + patterns.append(".code_review/**/*.md") + if not patterns: + patterns = ["AGENTS.md", ".cursor/rules/**/*.mdc"] + return patterns + + +def parse_yaml_mapping_fragment(raw: str) -> dict[str, Any]: + text = raw.strip() + if not text: + return {} + + try: + import yaml # type: ignore + + data = yaml.safe_load(text) + return data if isinstance(data, dict) else {} + except ModuleNotFoundError: + pass + except Exception: + return {} + + # Lightweight fallback parser for simple key/value YAML. + parsed: dict[str, Any] = {} + current_key: str | None = None + for raw_line in text.splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + if line.startswith("-") and current_key: + current = parsed.get(current_key) + if not isinstance(current, list): + current = [] + current.append(line[1:].strip()) + parsed[current_key] = current + continue + if ":" not in line: + continue + key, value = line.split(":", 1) + k = key.strip() + v = value.strip() + current_key = k + if not v: + parsed[k] = [] + continue + if v.startswith("[") and v.endswith("]"): + inner = v[1:-1].strip() + parsed[k] = [item.strip().strip("'\"") for item in inner.split(",") if item.strip()] + continue + lowered = v.lower() + if lowered in {"true", "false"}: + parsed[k] = lowered == "true" + else: + parsed[k] = v.strip("'\"") + return parsed + + +def parse_frontmatter(path: Path) -> dict[str, Any]: + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + return {} + + if not text.startswith("---"): + return {} + + match = re.match(r"^---\s*\n(.*?)\n---\s*(?:\n|$)", text, flags=re.DOTALL) + if not match: + return {} + return parse_yaml_mapping_fragment(match.group(1)) + + +def _domain_hints_from_text(text: str) -> list[str]: + lowered = text.lower() + out: list[str] = [] + hints: list[tuple[str, tuple[str, ...]]] = [ + ("experiments", ("experiment", "treatment", "abtest", "feature-flag")), + ("compose", ("compose", "composable", "design-compose")), + ("coroutines", ("coroutine", "flow", "rxjava", "dispatcher")), + ("testing", ("testing", "test", "fake", "mock", "assert", "junit")), + ] + for domain, needles in hints: + if any(needle in lowered for needle in needles): + out.append(domain) + return out + + +def _to_boolish(value: Any) -> bool | None: + if isinstance(value, bool): + return value + if isinstance(value, str): + lowered = value.strip().lower() + if lowered in {"true", "1", "yes", "on"}: + return True + if lowered in {"false", "0", "no", "off"}: + return False + return None + + +def _extract_rule_globs(meta: dict[str, Any]) -> list[str]: + globs: list[str] = [] + candidates: list[Any] = [ + meta.get("file_scope"), + meta.get("fileScope"), + meta.get("scope"), + meta.get("scopes"), + meta.get("globs"), + meta.get("files"), + meta.get("include"), + ] + for candidate in candidates: + for raw_item in to_string_list(candidate, []): + item = raw_item.strip() + if not item: + continue + lowered = item.lower() + if lowered in {"all files", "all", "*"}: + continue + globs.append(item) + return _unique(globs) + + +def _extract_rule_domains(meta: dict[str, Any], rel_path: str) -> list[str]: + domains: list[str] = [] + domain_candidates = [ + meta.get("domain"), + meta.get("domains"), + meta.get("tags"), + meta.get("category"), + meta.get("categories"), + ] + for candidate in domain_candidates: + for item in to_string_list(candidate, []): + normalized = item.strip().lower().replace(" ", "-") + if normalized in {"experiment", "experiments"}: + domains.append("experiments") + elif normalized in {"compose", "ui-compose"}: + domains.append("compose") + elif normalized in {"coroutines", "coroutine", "async", "rxjava"}: + domains.append("coroutines") + elif normalized in {"testing", "tests", "test"}: + domains.append("testing") + elif normalized in {"core"}: + domains.append("core") + elif normalized: + domains.append(normalized) + + if not domains: + description = str(meta.get("description", "")).strip() + domains.extend(_domain_hints_from_text(f"{rel_path} {description}")) + return _unique(domains) + + +def discover_rule_metadata(repo_root: Path, patterns: list[str]) -> list[dict[str, Any]]: + files = discover_rule_files(repo_root, patterns) + rows: list[dict[str, Any]] = [] + for rel in files: + abs_path = repo_root / rel + meta = parse_frontmatter(abs_path) + always_apply = _to_boolish(meta.get("always_apply")) + if always_apply is None: + always_apply = _to_boolish(meta.get("alwaysApply")) + description = str(meta.get("description", "")).strip() + row = { + "path": rel, + "always_apply": bool(always_apply) if always_apply is not None else False, + "file_globs": _extract_rule_globs(meta), + "domains": _extract_rule_domains(meta, rel), + "description": description, + } + rows.append(row) + return rows + + +def infer_domains_from_rule_metadata(rule_metadata: list[dict[str, Any]]) -> list[str]: + domains = {"core"} + for row in rule_metadata: + for domain in to_string_list(row.get("domains"), []): + domains.add(domain) + + # Keep known domains first, then repo-specific domains. + preferred = ["core", "experiments", "compose", "coroutines", "testing"] + result = [d for d in preferred if d in domains] + for domain in sorted(domains): + if domain not in result: + result.append(domain) + return result + + +def default_pipeline_config() -> dict[str, Any]: + return { + "include_policy_pass": True, + "include_core_passes": True, + "include_domain_passes": True, + "include_depth_passes": True, + "policy_instructions": DEFAULT_POLICY_PASS_INSTRUCTIONS, + "core_passes": json.loads(json.dumps(DEFAULT_CORE_PASS_SPECS)), + "depth_instructions": DEFAULT_DEPTH_PASS_INSTRUCTIONS, + } + + +def build_bootstrap_profile(repo_root: Path) -> dict[str, Any]: + rule_patterns = infer_rule_patterns(repo_root) + rule_metadata = discover_rule_metadata(repo_root, rule_patterns) + domains = infer_domains_from_rule_metadata(rule_metadata) + by_domain: dict[str, str] = { + d: DOMAIN_PROMPT_TEMPLATES[d] + for d in domains + if d in DOMAIN_PROMPT_TEMPLATES and d != "core" + } + + return { + "version": 1, + "repo": {"name": infer_repo_name(repo_root)}, + "review": { + "default_base": detect_default_base(repo_root), + "strict_gate": True, + "depth_hotspots": 3, + "output_root": ".codex/review-runs", + "max_files_per_shard": 40, + "max_shards": 5, + "parallel_shards": 1, + "max_passes": 0, + "time_budget_minutes": 0, + "waiver_file": DEFAULT_WAIVER_FILE, + "baseline_file": DEFAULT_BASELINE_FILE, + }, + "rules": {"include": rule_patterns}, + "domains": {"default": domains, "allowed": domains}, + "prompts": { + "global": DEFAULT_GLOBAL_PROMPT, + "by_domain": by_domain, + }, + "pipeline": default_pipeline_config(), + } + + +def write_profile(path: Path, profile: dict[str, Any]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + # Write JSON-compatible YAML to avoid hard dependency on PyYAML. + path.write_text(json.dumps(profile, indent=2) + "\n", encoding="utf-8") + + +def _stable(obj: Any) -> str: + return json.dumps(obj, sort_keys=True, separators=(",", ":")) + + +def _unique(values: list[str]) -> list[str]: + seen: set[str] = set() + out: list[str] = [] + for v in values: + s = str(v).strip() + if not s or s in seen: + continue + seen.add(s) + out.append(s) + return out + + +def _ensure_dict(parent: dict[str, Any], key: str) -> dict[str, Any]: + cur = parent.get(key) + if isinstance(cur, dict): + return cur + parent[key] = {} + return parent[key] + + +def sync_profile_with_repo( + raw_profile: dict[str, Any], + repo_root: Path, + *, + prune_autogen: bool, +) -> tuple[dict[str, Any], bool]: + """Merge repository-derived signals into profile while preserving manual overrides.""" + before = _stable(raw_profile) + profile: dict[str, Any] = json.loads(json.dumps(raw_profile)) + inferred = build_bootstrap_profile(repo_root) + + profile_meta = _ensure_dict(profile, "profile_meta") + autogen = _ensure_dict(profile_meta, "autogen") + prev_autogen_rules = to_string_list(autogen.get("rules_include"), []) + prev_autogen_domains = to_string_list(autogen.get("domains"), []) + prev_prompt_raw = autogen.get("prompt_by_domain") + prev_autogen_prompt_map: dict[str, str] = {} + if isinstance(prev_prompt_raw, dict): + for key, value in prev_prompt_raw.items(): + k = str(key).strip() + if not k: + continue + prev_autogen_prompt_map[k] = str(value) + + repo = _ensure_dict(profile, "repo") + if not str(repo.get("name", "")).strip(): + repo["name"] = inferred["repo"]["name"] + + review = _ensure_dict(profile, "review") + if not str(review.get("default_base", "")).strip(): + review["default_base"] = inferred["review"]["default_base"] + if "strict_gate" not in review: + review["strict_gate"] = True + if "depth_hotspots" not in review: + review["depth_hotspots"] = 3 + if not str(review.get("output_root", "")).strip(): + review["output_root"] = ".codex/review-runs" + if "max_files_per_shard" not in review: + review["max_files_per_shard"] = 40 + if "max_shards" not in review: + review["max_shards"] = 5 + if "parallel_shards" not in review: + review["parallel_shards"] = 1 + if "max_passes" not in review: + review["max_passes"] = 0 + if "time_budget_minutes" not in review: + review["time_budget_minutes"] = 0 + if not str(review.get("waiver_file", "")).strip(): + review["waiver_file"] = DEFAULT_WAIVER_FILE + if not str(review.get("baseline_file", "")).strip(): + review["baseline_file"] = DEFAULT_BASELINE_FILE + + rules = _ensure_dict(profile, "rules") + existing_patterns = to_string_list(rules.get("include"), []) + inferred_patterns = to_string_list(inferred["rules"]["include"], []) + if prune_autogen and prev_autogen_rules: + prev_rule_set = set(prev_autogen_rules) + existing_patterns = [p for p in existing_patterns if p not in prev_rule_set] + merged_patterns = _unique(existing_patterns + inferred_patterns) + rules["include"] = merged_patterns + + domains = _ensure_dict(profile, "domains") + existing_allowed = to_string_list(domains.get("allowed"), []) + existing_default = to_string_list(domains.get("default"), []) + inferred_domains = to_string_list(inferred["domains"]["default"], ["core"]) + if prune_autogen and prev_autogen_domains: + prev_domain_set = set(prev_autogen_domains) + existing_allowed = [d for d in existing_allowed if d not in prev_domain_set] + existing_default = [d for d in existing_default if d not in prev_domain_set] + merged_allowed = _unique(existing_allowed + inferred_domains) + merged_default = _unique(existing_default + inferred_domains) + + # Default must be subset of allowed. + merged_default = [d for d in merged_default if d in set(merged_allowed)] + if not merged_allowed: + merged_allowed = ["core"] + if not merged_default: + merged_default = ["core"] + + domains["allowed"] = merged_allowed + domains["default"] = merged_default + + prompts = _ensure_dict(profile, "prompts") + if not str(prompts.get("global", "")).strip(): + prompts["global"] = inferred["prompts"]["global"] + by_domain = prompts.get("by_domain") + if not isinstance(by_domain, dict): + by_domain = {} + + inferred_by_domain = inferred["prompts"]["by_domain"] + new_autogen_prompt_map = dict(prev_autogen_prompt_map) + for domain in merged_allowed: + if domain not in inferred_by_domain: + continue + + inferred_prompt = inferred_by_domain[domain] + existing_prompt = str(by_domain.get(domain, "")).strip() + prev_prompt = str(prev_autogen_prompt_map.get(domain, "")).strip() + + # Add missing template prompts; refresh only if this entry was auto-managed and unchanged. + if not existing_prompt: + by_domain[domain] = inferred_prompt + elif prev_prompt and existing_prompt == prev_prompt and existing_prompt != inferred_prompt: + by_domain[domain] = inferred_prompt + + new_autogen_prompt_map[domain] = inferred_prompt + + if prune_autogen: + for domain in list(by_domain.keys()): + if domain in inferred_by_domain: + continue + prev_prompt = str(prev_autogen_prompt_map.get(domain, "")).strip() + current_prompt = str(by_domain.get(domain, "")).strip() + if prev_prompt and current_prompt == prev_prompt: + del by_domain[domain] + new_autogen_prompt_map.pop(domain, None) + + prompts["by_domain"] = by_domain + + pipeline = _ensure_dict(profile, "pipeline") + inferred_pipeline = inferred.get("pipeline") + if isinstance(inferred_pipeline, dict): + for key, value in inferred_pipeline.items(): + if key not in pipeline: + pipeline[key] = value + existing_core_passes = pipeline.get("core_passes") + if not isinstance(existing_core_passes, list) or not existing_core_passes: + pipeline["core_passes"] = inferred_pipeline.get("core_passes", []) + + if "version" not in profile: + profile["version"] = 1 + + after_without_meta = _stable(profile) + changed = before != after_without_meta + + if prune_autogen: + autogen["rules_include"] = inferred_patterns + autogen["domains"] = inferred_domains + autogen["prompt_by_domain"] = { + domain: prompt + for domain, prompt in new_autogen_prompt_map.items() + if domain in inferred_by_domain + } + else: + # Preserve full auto-managed provenance when prune is disabled so future prune runs can still + # remove stale entries safely. + autogen["rules_include"] = _unique(prev_autogen_rules + inferred_patterns) + autogen["domains"] = _unique(prev_autogen_domains + inferred_domains) + preserved_prompt_map = dict(prev_autogen_prompt_map) + for domain, prompt in inferred_by_domain.items(): + preserved_prompt_map[domain] = prompt + autogen["prompt_by_domain"] = preserved_prompt_map + + meta = _ensure_dict(profile, "profile_meta") + if changed: + meta["managed_by"] = "codexw" + meta["last_synced_utc"] = dt.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + meta["sync_mode"] = "merge+prune" if prune_autogen else "merge" + + final_changed = before != _stable(profile) + return profile, final_changed + + +def load_yaml_or_json(path: Path) -> dict[str, Any]: + text = path.read_text(encoding="utf-8") + + # Preferred: PyYAML + try: + import yaml # type: ignore + + data = yaml.safe_load(text) + if not isinstance(data, dict): + die(f"profile at {path} must be a mapping/object") + return data + except ModuleNotFoundError: + pass + except Exception as exc: + die(f"invalid YAML in {path}: {exc}") + + # Fallback: JSON (YAML is superset of JSON) + try: + data = json.loads(text) + except json.JSONDecodeError: + die( + "PyYAML not available and profile is not valid JSON-compatible YAML. " + "Install PyYAML (python3 -m pip install pyyaml) or provide JSON syntax." + ) + if not isinstance(data, dict): + die(f"profile at {path} must be a mapping/object") + return data + + +def load_yaml_or_json_any(path: Path) -> Any: + text = path.read_text(encoding="utf-8") + + try: + import yaml # type: ignore + + return yaml.safe_load(text) + except ModuleNotFoundError: + pass + except Exception as exc: + die(f"invalid YAML in {path}: {exc}") + + try: + return json.loads(text) + except json.JSONDecodeError: + die( + f"{path} is not valid JSON-compatible YAML. " + "Install PyYAML (python3 -m pip install pyyaml) or provide JSON syntax." + ) + + +def to_bool(value: Any, default: bool) -> bool: + if value is None: + return default + if isinstance(value, bool): + return value + if isinstance(value, str): + norm = value.strip().lower() + if norm in {"1", "true", "yes", "on"}: + return True + if norm in {"0", "false", "no", "off"}: + return False + return default + + +def to_int(value: Any, default: int) -> int: + if value is None: + return default + try: + parsed = int(value) + return parsed if parsed >= 0 else default + except (TypeError, ValueError): + return default + + +def to_string_list(value: Any, default: list[str] | None = None) -> list[str]: + if value is None: + return list(default or []) + if isinstance(value, list): + return [str(x).strip() for x in value if str(x).strip()] + if isinstance(value, str): + # Support comma-separated shorthand + return [x.strip() for x in value.split(",") if x.strip()] + return list(default or []) + + +def normalize_profile(raw: dict[str, Any]) -> dict[str, Any]: + repo = raw.get("repo") or {} + review = raw.get("review") or {} + rules = raw.get("rules") or {} + domains = raw.get("domains") or {} + prompts = raw.get("prompts") or {} + pipeline = raw.get("pipeline") or {} + + if not isinstance(repo, dict): + repo = {} + if not isinstance(review, dict): + review = {} + if not isinstance(rules, dict): + rules = {} + if not isinstance(domains, dict): + domains = {} + if not isinstance(prompts, dict): + prompts = {} + if not isinstance(pipeline, dict): + pipeline = {} + + allowed_domains = to_string_list(domains.get("allowed"), ["core"]) + default_domains = to_string_list(domains.get("default"), allowed_domains) + if not default_domains: + default_domains = list(allowed_domains) + if not allowed_domains: + allowed_domains = ["core"] + + domain_prompt_map = prompts.get("by_domain") + if not isinstance(domain_prompt_map, dict): + domain_prompt_map = {} + + default_pipeline = default_pipeline_config() + pipeline_core_raw = pipeline.get("core_passes") + if not isinstance(pipeline_core_raw, list) or not pipeline_core_raw: + pipeline_core_raw = default_pipeline["core_passes"] + + pipeline_core_passes: list[dict[str, str]] = [] + for idx, raw_pass in enumerate(pipeline_core_raw, start=1): + if not isinstance(raw_pass, dict): + continue + pass_id = str(raw_pass.get("id", f"core-pass-{idx}")).strip() or f"core-pass-{idx}" + pass_name = str(raw_pass.get("name", pass_id)).strip() or pass_id + instructions = str(raw_pass.get("instructions", "")).strip() + if not instructions: + continue + shard_mode = str(raw_pass.get("shard", "none")).strip().lower() + if shard_mode not in {"none", "changed_files"}: + shard_mode = "none" + pipeline_core_passes.append( + { + "id": pass_id, + "name": pass_name, + "instructions": instructions, + "shard": shard_mode, + } + ) + + if not pipeline_core_passes: + pipeline_core_passes = json.loads(json.dumps(default_pipeline["core_passes"])) + + normalized = { + "version": str(raw.get("version", "1")), + "repo_name": str(repo.get("name", "Repository")).strip() or "Repository", + "default_base": str(review.get("default_base", "main")).strip() or "main", + "strict_gate": to_bool(review.get("strict_gate"), True), + "depth_hotspots": to_int(review.get("depth_hotspots"), 3), + "output_root": str(review.get("output_root", ".codex/review-runs")).strip() + or ".codex/review-runs", + "max_files_per_shard": to_int(review.get("max_files_per_shard"), 40), + "max_shards": to_int(review.get("max_shards"), 5), + "parallel_shards": max(1, to_int(review.get("parallel_shards"), 1)), + "max_passes": to_int(review.get("max_passes"), 0), + "time_budget_minutes": to_int(review.get("time_budget_minutes"), 0), + "waiver_file": str(review.get("waiver_file", DEFAULT_WAIVER_FILE)).strip() + or DEFAULT_WAIVER_FILE, + "baseline_file": str(review.get("baseline_file", DEFAULT_BASELINE_FILE)).strip() + or DEFAULT_BASELINE_FILE, + "rule_patterns": to_string_list( + rules.get("include"), ["AGENTS.md", ".cursor/rules/**/*.mdc"] + ), + "default_domains": default_domains, + "allowed_domains": allowed_domains, + "global_prompt": str(prompts.get("global", "")).strip(), + "domain_prompts": { + str(k): str(v).strip() for k, v in domain_prompt_map.items() if str(v).strip() + }, + "pipeline": { + "include_policy_pass": to_bool( + pipeline.get("include_policy_pass"), + to_bool(default_pipeline.get("include_policy_pass"), True), + ), + "include_core_passes": to_bool( + pipeline.get("include_core_passes"), + to_bool(default_pipeline.get("include_core_passes"), True), + ), + "include_domain_passes": to_bool( + pipeline.get("include_domain_passes"), + to_bool(default_pipeline.get("include_domain_passes"), True), + ), + "include_depth_passes": to_bool( + pipeline.get("include_depth_passes"), + to_bool(default_pipeline.get("include_depth_passes"), True), + ), + "policy_instructions": str( + pipeline.get("policy_instructions", default_pipeline["policy_instructions"]) + ).strip() + or default_pipeline["policy_instructions"], + "core_passes": pipeline_core_passes, + "depth_instructions": str( + pipeline.get("depth_instructions", default_pipeline["depth_instructions"]) + ).strip() + or default_pipeline["depth_instructions"], + }, + } + return normalized + + +def discover_rule_files(repo_root: Path, patterns: list[str]) -> list[str]: + matches: set[str] = set() + for pattern in patterns: + expanded = glob.glob(str(repo_root / pattern), recursive=True) + for abs_path in expanded: + p = Path(abs_path) + if p.is_file(): + try: + rel = p.relative_to(repo_root) + except ValueError: + continue + matches.add(str(rel)) + return sorted(matches) + + +def is_git_repo(repo_root: Path) -> bool: + proc = subprocess.run( + ["git", "rev-parse", "--is-inside-work-tree"], + cwd=str(repo_root), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + text=True, + ) + return proc.returncode == 0 + + +def collect_changed_files(repo_root: Path, mode: str, base: str, commit: str) -> list[str]: + if mode == "full_repo": + if is_git_repo(repo_root): + tracked = run_checked(["git", "ls-files"], repo_root) + others = run_checked(["git", "ls-files", "--others", "--exclude-standard"], repo_root) + return sorted( + { + line.strip() + for line in (tracked + "\n" + others).splitlines() + if line.strip() + } + ) + files: list[str] = [] + for abs_path in repo_root.rglob("*"): + if not abs_path.is_file(): + continue + try: + rel = abs_path.relative_to(repo_root) + except ValueError: + continue + if any(part.startswith(".git") for part in rel.parts): + continue + files.append(str(rel)) + return sorted(files) + + if mode == "base": + cmd = ["git", "diff", "--name-only", f"{base}...HEAD"] + out = run_checked(cmd, repo_root) + return sorted({line.strip() for line in out.splitlines() if line.strip()}) + if mode == "uncommitted": + out1 = run_checked(["git", "diff", "--name-only", "HEAD"], repo_root) + out2 = run_checked( + ["git", "ls-files", "--others", "--exclude-standard"], repo_root + ) + return sorted( + { + line.strip() + for line in (out1 + "\n" + out2).splitlines() + if line.strip() + } + ) + if mode == "commit": + out = run_checked(["git", "show", "--name-only", "--pretty=", commit], repo_root) + return sorted({line.strip() for line in out.splitlines() if line.strip()}) + die(f"unsupported mode: {mode}") + return [] + + +def collect_numstat( + repo_root: Path, + mode: str, + base: str, + commit: str, + *, + files_for_full_repo: list[str] | None = None, +) -> list[tuple[int, str]]: + if mode == "full_repo": + files = files_for_full_repo or collect_changed_files(repo_root, mode, base, commit) + rows: list[tuple[int, str]] = [] + for rel in files: + abs_path = repo_root / rel + try: + size = abs_path.stat().st_size + except OSError: + size = 0 + rows.append((size, rel)) + rows.sort(key=lambda x: x[0], reverse=True) + return rows + + if mode == "base": + cmd = ["git", "diff", "--numstat", f"{base}...HEAD"] + elif mode == "uncommitted": + cmd = ["git", "diff", "--numstat", "HEAD"] + elif mode == "commit": + cmd = ["git", "show", "--numstat", "--pretty=", commit] + else: + die(f"unsupported mode: {mode}") + return [] + + out = run_checked(cmd, repo_root) + rows: list[tuple[int, str]] = [] + for raw in out.splitlines(): + parts = raw.split("\t") + if len(parts) < 3: + continue + add_raw, del_raw, path = parts[0], parts[1], parts[2] + add = int(add_raw) if add_raw.isdigit() else 0 + rem = int(del_raw) if del_raw.isdigit() else 0 + rows.append((add + rem, path)) + rows.sort(key=lambda x: x[0], reverse=True) + return rows + + +def changed_modules(changed_files: list[str]) -> list[tuple[int, str]]: + counts: dict[str, int] = {} + for path in changed_files: + parts = path.split("/") + key = "/".join(parts[:2]) if len(parts) >= 2 else parts[0] + counts[key] = counts.get(key, 0) + 1 + rows = [(count, module) for module, count in counts.items()] + rows.sort(key=lambda x: (-x[0], x[1])) + return rows + + +def pass_has_no_findings(text: str, parsed_findings: list[dict[str, Any]] | None = None) -> bool: + if NO_FINDINGS_SENTINEL not in text: + return False + if parsed_findings is None: + parsed_findings = parse_findings_from_pass(text, "probe") + return len(parsed_findings) == 0 + + +def rule_block(rule_files: list[str]) -> str: + if not rule_files: + return "Required standards files (read and enforce strictly):\n- (none discovered)" + lines = ["Required standards files (read and enforce strictly):"] + lines.extend([f"- {rule}" for rule in rule_files]) + return "\n".join(lines) + + +def build_diff_context(changed_files: list[str], modules: list[tuple[int, str]], hotspots: list[str]) -> str: + mod_lines = "\n".join([f"- {m} ({c} files)" for c, m in modules]) or "- (none)" + hot_lines = "\n".join([f"- {h}" for h in hotspots]) or "- (none)" + file_lines = "\n".join([f"- {f}" for f in changed_files]) or "- (none)" + return ( + "Change context for breadth/depth coverage:\n" + f"- Changed files count: {len(changed_files)}\n" + "- Changed modules:\n" + f"{mod_lines}\n" + "- Top hotspots (by changed lines):\n" + f"{hot_lines}\n" + "- Changed files:\n" + f"{file_lines}" + ) + + +def domain_prompt(domain: str, profile: dict[str, Any]) -> str: + custom = profile["domain_prompts"].get(domain, "") + base = ( + f"Domain focus: {domain}\n" + f"- identify domain-specific correctness and policy violations for '{domain}'\n" + f"- prioritize regressions and production-risk behavior in changed code" + ) + return base + ("\n" + custom if custom else "") + + +def sanitize_pass_id(value: str) -> str: + return re.sub(r"[^a-zA-Z0-9_-]", "-", value.strip()).strip("-") or "pass" + + +def shard_files(files: list[str], max_files_per_shard: int, max_shards: int) -> list[list[str]]: + if max_files_per_shard <= 0 or len(files) <= max_files_per_shard: + return [files] + shards = [files[i : i + max_files_per_shard] for i in range(0, len(files), max_files_per_shard)] + if max_shards > 0 and len(shards) > max_shards: + keep = shards[: max_shards - 1] + tail: list[str] = [] + for shard in shards[max_shards - 1 :]: + tail.extend(shard) + keep.append(tail) + return keep + return shards + + +def build_rule_coverage_accounting( + rule_metadata: list[dict[str, Any]], + changed_files: list[str], +) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + for rule in rule_metadata: + path = str(rule.get("path", "")).strip() + always_apply = bool(rule.get("always_apply")) + globs = to_string_list(rule.get("file_globs"), []) + matched_files: list[str] = [] + if globs: + for candidate in changed_files: + if any(fnmatch.fnmatch(candidate, pattern) for pattern in globs): + matched_files.append(candidate) + if always_apply: + applicability = "required" + elif globs and matched_files: + applicability = "applicable" + elif globs and not matched_files: + applicability = "not_applicable" + else: + applicability = "unknown_scope" + rows.append( + { + "rule_file": path, + "always_apply": always_apply, + "file_globs": globs, + "domains": to_string_list(rule.get("domains"), []), + "applicability": applicability, + "matched_files_count": len(matched_files), + "matched_files_sample": matched_files[:20], + } + ) + return rows + + +def finding_mentions_rule_file(finding_rule_text: str, rule_file: str) -> bool: + finding_text = finding_rule_text.strip().lower() + if not finding_text: + return False + normalized_rule = rule_file.strip().lower() + if normalized_rule and normalized_rule in finding_text: + return True + basename = Path(rule_file).name.strip().lower() + return bool(basename) and basename in finding_text + + +def evaluate_rule_coverage_rows( + rows: list[dict[str, Any]], + executed_passes: list[dict[str, Any]], + findings: list[dict[str, Any]], +) -> list[dict[str, Any]]: + executed_pass_ids = [str(p.get("id", "")) for p in executed_passes if str(p.get("id", "")).strip()] + + for row in rows: + rule_domains = to_string_list(row.get("domains"), []) + relevant_pass_ids: list[str] = [] + for pass_spec in executed_passes: + pass_id = str(pass_spec.get("id", "")).strip() + if not pass_id: + continue + kind = str(pass_spec.get("kind", "")).strip() + if kind == "policy": + relevant_pass_ids.append(pass_id) + continue + pass_domains = to_string_list(pass_spec.get("domains"), []) + if not rule_domains: + # Unknown domain mapping -> consider all non-domain passes as potentially relevant. + if kind in {"core", "depth"} or not pass_domains: + relevant_pass_ids.append(pass_id) + continue + if any(domain in pass_domains for domain in rule_domains): + relevant_pass_ids.append(pass_id) + + evidence_count = 0 + for finding in findings: + if finding_mentions_rule_file(str(finding.get("rule", "")), str(row.get("rule_file", ""))): + evidence_count += 1 + + applicability = str(row.get("applicability", "")).strip() + if applicability == "not_applicable": + coverage_status = "not_applicable_by_scope" + elif evidence_count > 0: + coverage_status = "evidenced_in_findings" + elif relevant_pass_ids: + coverage_status = "prompted_no_explicit_evidence" + elif executed_pass_ids: + coverage_status = "not_prompted_for_domain" + else: + coverage_status = "uncovered" + + row["relevant_executed_passes"] = relevant_pass_ids + row["relevant_executed_pass_count"] = len(relevant_pass_ids) + row["executed_pass_count"] = len(executed_pass_ids) + row["evidence_findings_count"] = evidence_count + row["coverage_status"] = coverage_status + + return rows + + +def extract_line_number(raw: str) -> int | None: + match = re.search(r"\d+", raw) + if not match: + return None + try: + number = int(match.group(0)) + except ValueError: + return None + return number if number > 0 else None + + +def normalize_finding_line(raw_line: str) -> str: + line = raw_line.strip() + if not line: + return "" + + # Allow markdown list styles: "-", "*", "1.", "1)". + line = re.sub(r"^[-*+]\s*", "", line) + line = re.sub(r"^\d+[.)]\s*", "", line) + + # Accept markdown-wrapped keys: **Severity:** P1, **Severity**: P1, __Type__: Bug, `Line`: 10. + line = re.sub(r"^\*\*([^*]+)\*\*\s*", r"\1 ", line) + line = re.sub(r"^__([^_]+)__\s*", r"\1 ", line) + line = re.sub(r"^`([^`]+)`\s*", r"\1 ", line) + line = re.sub(r"\s+:\s*", ": ", line, count=1) + return line + + +def parse_findings_from_pass(text: str, pass_id: str) -> list[dict[str, Any]]: + findings: list[dict[str, Any]] = [] + current: dict[str, Any] | None = None + + def flush() -> None: + nonlocal current + if not current: + return + severity = str(current.get("severity", "")).strip().upper() + file_path = str(current.get("file_path", "")).strip() + if severity and file_path: + current["pass_id"] = pass_id + current["line"] = extract_line_number(str(current.get("line_raw", ""))) + findings.append(current) + current = None + + for raw_line in text.splitlines(): + line = normalize_finding_line(raw_line) + if not line: + continue + if NO_FINDINGS_SENTINEL in line: + continue + + severity_match = re.match(r"(?i)^severity\s*:\s*(P[0-3])\b", line) + if severity_match: + flush() + current = { + "severity": severity_match.group(1).upper(), + "type": "", + "file_path": "", + "line_raw": "", + "rule": "", + "risk": "", + "fix": "", + "title": "", + } + continue + + if not current: + continue + + if re.match(r"(?i)^type\s*:", line): + current["type"] = line.split(":", 1)[1].strip() + elif re.match(r"(?i)^(file\s*path|path|file)\s*:", line): + current["file_path"] = line.split(":", 1)[1].strip() + elif re.match(r"(?i)^(line|line\s*number|precise line number|line range)\s*:", line): + current["line_raw"] = line.split(":", 1)[1].strip() + elif re.match(r"(?i)^violated rule", line): + current["rule"] = line.split(":", 1)[1].strip() + elif re.match(r"(?i)^why this is risky\s*:", line): + current["risk"] = line.split(":", 1)[1].strip() + elif re.match(r"(?i)^minimal fix direction\s*:", line): + current["fix"] = line.split(":", 1)[1].strip() + elif re.match(r"(?i)^title\s*:", line): + current["title"] = line.split(":", 1)[1].strip() + else: + # Keep additional context attached to risk if available. + if current.get("risk"): + current["risk"] = f"{current['risk']} {line}".strip() + + flush() + return findings + + +def finding_fingerprint(finding: dict[str, Any]) -> str: + raw = "|".join( + [ + str(finding.get("severity", "")).upper(), + str(finding.get("type", "")).lower(), + str(finding.get("file_path", "")).lower(), + str(finding.get("line", "")), + str(finding.get("rule", "")).lower(), + str(finding.get("risk", "")).lower()[:200], + ] + ) + return hashlib.sha1(raw.encode("utf-8")).hexdigest() + + +def parse_date_yyyy_mm_dd(raw: str) -> dt.date | None: + value = raw.strip() + if not value: + return None + try: + return dt.datetime.strptime(value, "%Y-%m-%d").date() + except ValueError: + return None + + +def load_waivers(repo_root: Path, waiver_path: str) -> list[dict[str, Any]]: + path = Path(waiver_path) + if not path.is_absolute(): + path = repo_root / path + if not path.exists(): + return [] + + payload = load_yaml_or_json_any(path) + if isinstance(payload, dict): + rows = payload.get("waivers") + else: + rows = payload + if not isinstance(rows, list): + return [] + + normalized: list[dict[str, Any]] = [] + for row in rows: + if not isinstance(row, dict): + continue + normalized.append(row) + return normalized + + +def load_baseline_fingerprints(repo_root: Path, baseline_path: str) -> set[str]: + path = Path(baseline_path) + if not path.is_absolute(): + path = repo_root / path + if not path.exists(): + return set() + + payload = load_yaml_or_json_any(path) + rows: list[str] = [] + if isinstance(payload, dict): + rows = to_string_list(payload.get("fingerprints"), []) + elif isinstance(payload, list): + rows = [str(x).strip() for x in payload if str(x).strip()] + return {x for x in rows if x} + + +def waiver_matches(finding: dict[str, Any], waiver: dict[str, Any], today: dt.date) -> bool: + owner = str(waiver.get("owner", "")).strip() + reason = str(waiver.get("reason", "")).strip() + if not owner or not reason: + return False + + expiry_raw = str(waiver.get("expires_on", "")).strip() + if expiry_raw: + expiry = parse_date_yyyy_mm_dd(expiry_raw) + if not expiry: + return False + if expiry < today: + return False + + file_pattern = str(waiver.get("file", "")).strip() + if file_pattern and not fnmatch.fnmatch(str(finding.get("file_path", "")), file_pattern): + return False + + severity = str(waiver.get("severity", "")).strip().upper() + if severity and severity != str(finding.get("severity", "")).strip().upper(): + return False + + finding_type = str(waiver.get("type", "")).strip().lower() + if finding_type and finding_type != str(finding.get("type", "")).strip().lower(): + return False + + line_value = waiver.get("line") + if line_value is not None: + try: + expected_line = int(line_value) + except (TypeError, ValueError): + return False + if expected_line != int(finding.get("line") or 0): + return False + + contains = str(waiver.get("contains", "")).strip().lower() + if contains: + haystack = " ".join( + [ + str(finding.get("title", "")), + str(finding.get("risk", "")), + str(finding.get("fix", "")), + str(finding.get("rule", "")), + ] + ).lower() + if contains not in haystack: + return False + + return True + + +def apply_waivers_and_baseline( + findings: list[dict[str, Any]], + waivers: list[dict[str, Any]], + baseline_fingerprints: set[str], +) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]]]: + active: list[dict[str, Any]] = [] + waived: list[dict[str, Any]] = [] + baselined: list[dict[str, Any]] = [] + today = dt.datetime.utcnow().date() + + for finding in findings: + finding = dict(finding) + fp = finding_fingerprint(finding) + finding["fingerprint"] = fp + + matched_waiver: dict[str, Any] | None = None + for waiver in waivers: + if waiver_matches(finding, waiver, today): + matched_waiver = waiver + break + if matched_waiver is not None: + finding["waiver"] = { + "owner": str(matched_waiver.get("owner", "")).strip(), + "reason": str(matched_waiver.get("reason", "")).strip(), + "expires_on": str(matched_waiver.get("expires_on", "")).strip(), + } + finding["status"] = "waived" + waived.append(finding) + continue + + if fp in baseline_fingerprints: + finding["status"] = "baselined" + baselined.append(finding) + continue + + finding["status"] = "active" + active.append(finding) + + return active, waived, baselined + + +def findings_to_sarif(findings: list[dict[str, Any]]) -> dict[str, Any]: + level_map = {"P0": "error", "P1": "error", "P2": "warning", "P3": "note"} + rules_seen: set[str] = set() + rules: list[dict[str, Any]] = [] + results: list[dict[str, Any]] = [] + + for finding in findings: + finding_type = str(finding.get("type", "Unknown")).strip() or "Unknown" + rule_id = f"codexw/{finding_type}" + if rule_id not in rules_seen: + rules_seen.add(rule_id) + rules.append( + { + "id": rule_id, + "name": finding_type, + "shortDescription": {"text": f"Codexw finding: {finding_type}"}, + } + ) + + line = finding.get("line") + location: dict[str, Any] = { + "physicalLocation": { + "artifactLocation": {"uri": str(finding.get("file_path", ""))}, + "region": {"startLine": int(line) if isinstance(line, int) and line > 0 else 1}, + } + } + + msg_parts = [ + f"Severity {finding.get('severity', 'P2')}", + str(finding.get("risk", "")).strip(), + str(finding.get("fix", "")).strip(), + ] + message = " | ".join([part for part in msg_parts if part]) + + results.append( + { + "ruleId": rule_id, + "level": level_map.get(str(finding.get("severity", "P2")).upper(), "warning"), + "message": {"text": message or "Codexw finding"}, + "locations": [location], + } + ) + + return { + "version": "2.1.0", + "$schema": "https://json.schemastore.org/sarif-2.1.0.json", + "runs": [ + { + "tool": {"driver": {"name": "codexw", "rules": rules}}, + "results": results, + } + ], + } + + +def parse_remote_slug(repo_root: Path) -> str | None: + proc = subprocess.run( + ["git", "remote", "get-url", "origin"], + cwd=str(repo_root), + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if proc.returncode != 0: + return None + + raw = proc.stdout.strip() + if not raw: + return None + + ssh_match = re.match(r"git@[^:]+:([^/]+)/(.+?)(?:\.git)?$", raw) + if ssh_match: + return f"{ssh_match.group(1)}/{ssh_match.group(2)}" + + https_match = re.match(r"https?://[^/]+/([^/]+)/(.+?)(?:\.git)?$", raw) + if https_match: + return f"{https_match.group(1)}/{https_match.group(2)}" + return None + + +def resolve_pr_number(repo_root: Path, explicit_pr: str | None) -> str | None: + if explicit_pr: + return explicit_pr + proc = subprocess.run( + ["gh", "pr", "view", "--json", "number", "--jq", ".number"], + cwd=str(repo_root), + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if proc.returncode != 0: + return None + out = proc.stdout.strip() + return out or None + + +def publish_findings_to_github( + repo_root: Path, + findings: list[dict[str, Any]], + changed_files: list[str], + combined_report: Path, + pr_number: str | None, + review_mode: str, + *, + inline: bool, +) -> None: + if not shutil_which("gh"): + print("warning: gh CLI not found; skipping GitHub publish.", file=sys.stderr) + return + + slug = parse_remote_slug(repo_root) + if not slug: + print("warning: unable to resolve GitHub owner/repo; skipping GitHub publish.", file=sys.stderr) + return + + pr = resolve_pr_number(repo_root, pr_number) + if not pr: + print("warning: unable to resolve PR number; skipping GitHub publish.", file=sys.stderr) + return + + summary_cmd = ["gh", "pr", "comment", pr, "--repo", slug, "--body-file", str(combined_report)] + summary_proc = subprocess.run(summary_cmd, cwd=str(repo_root), text=True) + if summary_proc.returncode != 0: + print("warning: failed to publish summary PR comment via gh.", file=sys.stderr) + return + + if not inline: + return + + if review_mode == "full_repo": + print( + "warning: skipping inline GitHub comments for --full-repo mode; " + "inline API accepts only PR diff lines.", + file=sys.stderr, + ) + return + + inline_comments: list[dict[str, Any]] = [] + changed_set = set(changed_files) + for finding in findings: + path = str(finding.get("file_path", "")).strip() + line = finding.get("line") + if not path or not isinstance(line, int) or line <= 0: + continue + if changed_set and path not in changed_set: + continue + body = ( + f"[{finding.get('severity', 'P2')}] {finding.get('type', 'Finding')} - " + f"{finding.get('risk', '')}\n" + f"Fix: {finding.get('fix', '')}" + ).strip() + inline_comments.append({"path": path, "line": line, "side": "RIGHT", "body": body[:5000]}) + if len(inline_comments) >= 30: + break + + if not inline_comments: + return + + payload = {"body": "Codexw inline findings", "event": "COMMENT", "comments": inline_comments} + proc = subprocess.run( + ["gh", "api", "--method", "POST", f"repos/{slug}/pulls/{pr}/reviews", "--input", "-"], + cwd=str(repo_root), + text=True, + input=json.dumps(payload), + ) + if proc.returncode != 0: + print("warning: failed to publish inline review comments via gh api.", file=sys.stderr) + + +def run_review(args: argparse.Namespace) -> int: + repo_root = find_repo_root(Path.cwd()) + os.chdir(repo_root) + + profile_path = Path(args.profile or "local-review-profile.yaml") + if not profile_path.is_absolute(): + profile_path = repo_root / profile_path + + if not profile_path.exists(): + if args.no_bootstrap_profile: + die( + f"profile not found: {profile_path}. " + "Add local-review-profile.yaml in repository root or pass --profile." + ) + bootstrap_profile = build_bootstrap_profile(repo_root) + write_profile(profile_path, bootstrap_profile) + try: + profile_display = str(profile_path.relative_to(repo_root)) + except ValueError: + profile_display = str(profile_path) + print( + f"Generated {profile_display} automatically from repository signals. " + "Review and commit it.", + file=sys.stderr, + ) + + if not profile_path.exists(): + die( + f"profile not found: {profile_path}. " + "Add local-review-profile.yaml in repository root or pass --profile." + ) + + if args.sync_profile_only and args.no_sync_profile: + die("--sync-profile-only cannot be combined with --no-sync-profile") + if args.replace_baseline and not args.update_baseline: + die("--replace-baseline requires --update-baseline") + + raw_profile = load_yaml_or_json(profile_path) + if args.no_sync_profile: + synced_profile = raw_profile + else: + synced_profile, was_updated = sync_profile_with_repo( + raw_profile, + repo_root, + prune_autogen=not args.no_prune_autogen, + ) + if was_updated: + write_profile(profile_path, synced_profile) + try: + profile_display = str(profile_path.relative_to(repo_root)) + except ValueError: + profile_display = str(profile_path) + print( + f"Synchronized {profile_display} from repository signals " + f"(prune_autogen={'on' if not args.no_prune_autogen else 'off'}).", + file=sys.stderr, + ) + + profile = normalize_profile(synced_profile) + + if args.print_effective_profile: + print( + json.dumps( + { + "profile_path": str(profile_path), + "repo_root": str(repo_root), + "effective_profile": profile, + }, + indent=2, + sort_keys=True, + ) + ) + return 0 + + if args.bootstrap_only or args.sync_profile_only: + print(f"Profile ready: {profile_path}") + return 0 + + if not shutil_which("codex"): + die("codex CLI not found in PATH") + + mode = "base" + base_branch = args.base or profile["default_base"] + commit_sha = args.commit or "" + if args.full_repo: + mode = "full_repo" + elif args.uncommitted: + mode = "uncommitted" + elif args.commit: + mode = "commit" + + fail_on_findings = profile["strict_gate"] + if args.fail_on_findings: + fail_on_findings = True + if args.no_fail_on_findings: + fail_on_findings = False + + depth_hotspots = args.depth_hotspots if args.depth_hotspots is not None else profile["depth_hotspots"] + max_files_per_shard = ( + args.max_files_per_shard + if args.max_files_per_shard is not None + else profile["max_files_per_shard"] + ) + max_shards = args.max_shards if args.max_shards is not None else profile["max_shards"] + parallel_shards = ( + args.parallel_shards if args.parallel_shards is not None else profile["parallel_shards"] + ) + max_passes = args.max_passes if args.max_passes is not None else profile["max_passes"] + time_budget_minutes = ( + args.time_budget_minutes + if args.time_budget_minutes is not None + else profile["time_budget_minutes"] + ) + time_budget_seconds = max(0, time_budget_minutes) * 60 + + allowed_domains = profile["allowed_domains"] + default_domains = profile["default_domains"] + if args.domains: + selected_domains = [d.strip() for d in args.domains.split(",") if d.strip()] + else: + selected_domains = list(default_domains) + + unknown = [d for d in selected_domains if d not in allowed_domains] + if unknown: + die( + f"invalid domain(s): {', '.join(unknown)}. " + f"Allowed: {', '.join(allowed_domains)}" + ) + + ts = dt.datetime.now().strftime("%Y%m%d-%H%M%S") + output_root = Path(args.output_dir) if args.output_dir else Path(profile["output_root"]) / ts + if not output_root.is_absolute(): + output_root = repo_root / output_root + output_root.mkdir(parents=True, exist_ok=True) + + target_args: list[str] = [] + target_desc: str + if mode == "base": + target_args += ["--base", base_branch] + target_desc = f"base branch: {base_branch}" + elif mode == "uncommitted": + target_args += ["--uncommitted"] + target_desc = "uncommitted changes" + elif mode == "commit": + target_args += ["--commit", commit_sha] + target_desc = f"commit: {commit_sha}" + else: + target_desc = "full repository" + + if args.title: + target_args += ["--title", args.title] + + model_override = args.model or "" + if model_override: + target_args += ["-c", f'model="{model_override}"'] + + rule_files = discover_rule_files(repo_root, profile["rule_patterns"]) + rule_metadata = discover_rule_metadata(repo_root, profile["rule_patterns"]) + auto_rule_file = output_root / "enforced-rule-files.txt" + auto_rule_file.write_text("\n".join(rule_files) + ("\n" if rule_files else ""), encoding="utf-8") + + changed_files = collect_changed_files(repo_root, mode, base_branch, commit_sha) + changed_files_file = output_root / "changed-files.txt" + changed_files_file.write_text( + "\n".join(changed_files) + ("\n" if changed_files else ""), encoding="utf-8" + ) + + modules = changed_modules(changed_files) + changed_modules_file = output_root / "changed-modules.txt" + changed_modules_file.write_text( + "\n".join([f"{count}\t{module}" for count, module in modules]) + + ("\n" if modules else ""), + encoding="utf-8", + ) + + numstat = collect_numstat( + repo_root, + mode, + base_branch, + commit_sha, + files_for_full_repo=changed_files, + ) + hotspots = [path for _, path in numstat[: depth_hotspots if depth_hotspots > 0 else 0]] + hotspots_file = output_root / "hotspots.txt" + hotspots_file.write_text("\n".join(hotspots) + ("\n" if hotspots else ""), encoding="utf-8") + + if not changed_files: + combined_report = output_root / "combined-report.md" + combined_report.write_text( + "\n".join( + [ + "# Codex PR-Grade Multi-Pass Review", + "", + f"- Generated: {dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%SZ')}", + f"- Repository context: {profile['repo_name']}", + f"- Target: {target_desc}", + f"- Domains: {','.join(selected_domains)}", + "- Changed files: 0", + "", + "No files detected for selected target.", + ] + ) + + "\n", + encoding="utf-8", + ) + print("No files detected for selected target.") + print(f"Combined report: {combined_report}") + return 0 + + rule_coverage_rows = build_rule_coverage_accounting(rule_metadata, changed_files) + + base_rubric = ( + f"Act as a strict PR gate reviewer for {profile['repo_name']}.\n" + "Return only actionable findings.\n\n" + "Enforcement order:\n" + "- AGENTS.md instructions\n" + "- Domain-specific internal rule files listed below\n" + "- Engineering correctness and risk\n\n" + "For each finding include:\n" + "- Severity: P0, P1, P2, or P3\n" + "- Type: Bug | Regression | Security | Concurrency | TestGap | RuleViolation\n" + "- File path\n" + "- Precise line number or tight line range\n" + "- Violated rule and rule file path (when applicable)\n" + "- Why this is risky\n" + "- Minimal fix direction\n\n" + "Do not output style-only comments unless they violate a required internal rule.\n" + f'If no findings, output exactly: "{NO_FINDINGS_SENTINEL}".' + ) + + global_prompt = profile.get("global_prompt", "") + full_diff_context = build_diff_context(changed_files, modules, hotspots) + rules_block = rule_block(rule_files) + + def pass_prompt(extra: str, *, context_override: str | None = None) -> str: + parts = [base_rubric, rules_block, context_override or full_diff_context] + if global_prompt: + parts.append("Profile global context:\n" + global_prompt) + parts.append(extra) + return "\n\n".join([p for p in parts if p.strip()]) + + pipeline = profile["pipeline"] + passes: list[dict[str, Any]] = [] + pass_counter = 0 + + if pipeline.get("include_policy_pass", True): + pass_counter += 1 + passes.append( + { + "id": f"pass-{pass_counter}-policy-sweep", + "name": "Policy: full standards coverage sweep", + "prompt": pass_prompt(str(pipeline.get("policy_instructions", ""))), + "parallel_group": "", + "kind": "policy", + "domains": list(selected_domains), + } + ) + + if pipeline.get("include_core_passes", True) and "core" in selected_domains: + core_passes = pipeline.get("core_passes") or [] + shards = shard_files(changed_files, max_files_per_shard, max_shards) + for core_pass in core_passes: + pass_id = sanitize_pass_id(str(core_pass.get("id", "core-pass"))) + pass_name = str(core_pass.get("name", pass_id)).strip() or pass_id + instructions = str(core_pass.get("instructions", "")).strip() + if not instructions: + continue + shard_mode = str(core_pass.get("shard", "none")).strip().lower() + if shard_mode == "changed_files" and len(shards) > 1: + for shard_index, shard in enumerate(shards, start=1): + shard_modules = changed_modules(shard) + shard_hotspots = [h for h in hotspots if h in set(shard)] + shard_context = build_diff_context(shard, shard_modules, shard_hotspots) + pass_counter += 1 + passes.append( + { + "id": f"pass-{pass_counter}-{pass_id}-shard-{shard_index}", + "name": f"{pass_name} (shard {shard_index}/{len(shards)})", + "prompt": pass_prompt( + "Shard scope:\n" + f"- shard index: {shard_index}/{len(shards)}\n" + "- Review only files in this shard section, while using full repo context for dependencies.\n\n" + + instructions, + context_override=shard_context, + ), + "parallel_group": f"core-{pass_id}-shards", + "kind": "core", + "domains": ["core"], + } + ) + else: + pass_counter += 1 + passes.append( + { + "id": f"pass-{pass_counter}-{pass_id}", + "name": pass_name, + "prompt": pass_prompt(instructions), + "parallel_group": "", + "kind": "core", + "domains": ["core"], + } + ) + + if pipeline.get("include_domain_passes", True): + for domain in selected_domains: + if domain == "core": + continue + pass_counter += 1 + slug = sanitize_pass_id(domain) + passes.append( + { + "id": f"pass-{pass_counter}-domain-{slug}", + "name": f"Domain: {domain}", + "prompt": pass_prompt(domain_prompt(domain, profile)), + "parallel_group": "", + "kind": "domain", + "domains": [domain], + } + ) + + if pipeline.get("include_depth_passes", True): + depth_template = str(pipeline.get("depth_instructions", DEFAULT_DEPTH_PASS_INSTRUCTIONS)) + for hotspot in hotspots: + pass_counter += 1 + hotspot_slug = sanitize_pass_id(hotspot.replace("/", "_")) + try: + depth_instructions = depth_template.format(hotspot=hotspot) + except Exception: + depth_instructions = DEFAULT_DEPTH_PASS_INSTRUCTIONS.format(hotspot=hotspot) + passes.append( + { + "id": f"pass-{pass_counter}-depth-{hotspot_slug}", + "name": f"Depth hotspot: {hotspot}", + "prompt": pass_prompt(depth_instructions), + "parallel_group": "", + "kind": "depth", + "domains": list(selected_domains), + } + ) + + if not passes: + die("no review passes configured; check profile.pipeline settings") + + run_started = time.monotonic() + executed_passes: dict[str, dict[str, Any]] = {} + skipped_passes: dict[str, str] = {} + + def run_one_pass(pass_spec: dict[str, Any], ordinal: int, total: int, *, stream_output: bool) -> dict[str, Any]: + pass_id = str(pass_spec["id"]) + pass_name = str(pass_spec["name"]) + prompt = str(pass_spec["prompt"]) + out_file = output_root / f"{pass_id}.md" + print(f"\n==> ({ordinal}/{total}) {pass_name}") + run_review_pass_with_compat( + repo_root=repo_root, + out_file=out_file, + target_args=target_args, + target_desc=target_desc, + prompt=prompt, + pass_name=pass_name, + stream_output=stream_output, + ) + text = out_file.read_text(encoding="utf-8", errors="replace") + parsed = parse_findings_from_pass(text, pass_id) + no_findings = pass_has_no_findings(text, parsed) + if not no_findings and not parsed: + parsed = [ + { + "severity": "P2", + "type": "UnparsedFinding", + "file_path": "(unparsed-output)", + "line_raw": "", + "line": None, + "rule": "", + "risk": "Pass output contained findings but did not match structured schema.", + "fix": "Ensure findings follow the required schema with Severity/Type/File path/Line fields.", + "title": pass_name, + "pass_id": pass_id, + } + ] + return { + "id": pass_id, + "name": pass_name, + "out_file": str(out_file), + "parsed_findings": parsed, + "no_findings": no_findings, + } + + index = 0 + executed_count = 0 + while index < len(passes): + if max_passes > 0 and executed_count >= max_passes: + for pass_spec in passes[index:]: + skipped_passes[str(pass_spec["id"])] = "max_passes" + break + + elapsed = time.monotonic() - run_started + if time_budget_seconds > 0 and elapsed >= time_budget_seconds: + for pass_spec in passes[index:]: + skipped_passes[str(pass_spec["id"])] = "time_budget" + break + + current = passes[index] + group = str(current.get("parallel_group", "")).strip() + if group and parallel_shards > 1: + batch: list[dict[str, Any]] = [] + cursor = index + while cursor < len(passes): + candidate = passes[cursor] + if str(candidate.get("parallel_group", "")).strip() != group: + break + batch.append(candidate) + cursor += 1 + + if max_passes > 0: + remaining = max_passes - executed_count + if remaining <= 0: + for pass_spec in batch: + skipped_passes[str(pass_spec["id"])] = "max_passes" + index = cursor + continue + if len(batch) > remaining: + for pass_spec in batch[remaining:]: + skipped_passes[str(pass_spec["id"])] = "max_passes" + batch = batch[:remaining] + + with ThreadPoolExecutor(max_workers=parallel_shards) as executor: + future_map = { + executor.submit( + run_one_pass, + pass_spec, + (index + offset + 1), + len(passes), + stream_output=False, + ): pass_spec + for offset, pass_spec in enumerate(batch) + } + for future in as_completed(future_map): + result = future.result() + executed_passes[result["id"]] = result + executed_count += 1 + + index = cursor + continue + + result = run_one_pass(current, index + 1, len(passes), stream_output=True) + executed_passes[result["id"]] = result + executed_count += 1 + index += 1 + + summary_lines: list[str] = [] + raw_findings: list[dict[str, Any]] = [] + executed_pass_ids = [p["id"] for p in passes if p["id"] in executed_passes] + for pass_spec in passes: + pass_id = str(pass_spec["id"]) + pass_name = str(pass_spec["name"]) + if pass_id in executed_passes: + result = executed_passes[pass_id] + if result["no_findings"]: + summary_lines.append(f"- [PASS] {pass_name}") + else: + summary_lines.append(f"- [FINDINGS] {pass_name}") + raw_findings.extend(result["parsed_findings"]) + else: + reason = skipped_passes.get(pass_id, "skipped") + summary_lines.append(f"- [SKIPPED:{reason}] {pass_name}") + + executed_pass_specs = [spec for spec in passes if str(spec.get("id", "")) in set(executed_pass_ids)] + rule_coverage_rows = evaluate_rule_coverage_rows( + rows=rule_coverage_rows, + executed_passes=executed_pass_specs, + findings=raw_findings, + ) + + rule_coverage_json = output_root / "rule-coverage-accounting.json" + rule_coverage_json.write_text(json.dumps(rule_coverage_rows, indent=2) + "\n", encoding="utf-8") + rule_coverage_md = output_root / "rule-coverage-accounting.md" + with rule_coverage_md.open("w", encoding="utf-8") as fh: + fh.write("# Rule Coverage Accounting\n\n") + for row in rule_coverage_rows: + fh.write( + f"- {row['rule_file']} :: applicability={row['applicability']} " + f":: status={row['coverage_status']} " + f":: relevant_passes={row['relevant_executed_pass_count']} " + f":: evidence_findings={row['evidence_findings_count']} " + f":: matched_files={row['matched_files_count']}\n" + ) + + waiver_file = str(args.waiver_file or profile["waiver_file"]).strip() or DEFAULT_WAIVER_FILE + baseline_file = str(args.baseline_file or profile["baseline_file"]).strip() or DEFAULT_BASELINE_FILE + waivers = load_waivers(repo_root, waiver_file) + baseline_fingerprints = load_baseline_fingerprints(repo_root, baseline_file) + + active_findings, waived_findings, baselined_findings = apply_waivers_and_baseline( + raw_findings, + waivers, + baseline_fingerprints, + ) + + if args.update_baseline: + baseline_path = Path(baseline_file) + if not baseline_path.is_absolute(): + baseline_path = repo_root / baseline_path + baseline_path.parent.mkdir(parents=True, exist_ok=True) + new_fingerprint_set = {finding["fingerprint"] for finding in active_findings} + if args.replace_baseline: + final_fingerprints = sorted(new_fingerprint_set) + baseline_mode = "replace" + else: + final_fingerprints = sorted(set(baseline_fingerprints) | new_fingerprint_set) + baseline_mode = "merge" + baseline_payload = { + "updated_utc": dt.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), + "mode": baseline_mode, + "fingerprints": final_fingerprints, + } + baseline_path.write_text(json.dumps(baseline_payload, indent=2) + "\n", encoding="utf-8") + print( + f"Updated baseline file ({baseline_mode}): {baseline_path} " + f"[count={len(final_fingerprints)}]" + ) + + findings_json = output_root / "findings.json" + findings_payload = { + "generated_utc": dt.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), + "target": target_desc, + "counts": { + "raw": len(raw_findings), + "active": len(active_findings), + "waived": len(waived_findings), + "baselined": len(baselined_findings), + }, + "active_findings": active_findings, + "waived_findings": waived_findings, + "baselined_findings": baselined_findings, + } + findings_json.write_text(json.dumps(findings_payload, indent=2) + "\n", encoding="utf-8") + + sarif_file = output_root / "findings.sarif" + sarif_file.write_text(json.dumps(findings_to_sarif(active_findings), indent=2) + "\n", encoding="utf-8") + + summary_file = output_root / "pass-status.md" + summary_file.write_text("\n".join(summary_lines) + "\n", encoding="utf-8") + + combined_report = output_root / "combined-report.md" + with combined_report.open("w", encoding="utf-8") as fh: + try: + profile_display = str(profile_path.relative_to(repo_root)) + except ValueError: + profile_display = str(profile_path) + + fh.write("# Codex PR-Grade Multi-Pass Review\n\n") + fh.write(f"- Generated: {dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%SZ')}\n") + fh.write(f"- Repository context: {profile['repo_name']}\n") + fh.write(f"- Target: {target_desc}\n") + fh.write(f"- Domains: {','.join(selected_domains)}\n") + fh.write(f"- Auto-enforced rule files: {len(rule_files)}\n") + fh.write(f"- Changed files: {len(changed_files)}\n") + fh.write(f"- Depth hotspots: {depth_hotspots}\n") + fh.write(f"- Pass count (planned/executed): {len(passes)}/{len(executed_pass_ids)}\n") + fh.write(f"- Waiver file: {waiver_file}\n") + fh.write(f"- Baseline file: {baseline_file}\n") + if args.title: + fh.write(f"- Title: {args.title}\n") + if model_override: + fh.write(f"- Model override: {model_override}\n") + fh.write(f"- Profile file: {profile_display}\n\n") + + fh.write("## Findings Summary\n\n") + fh.write(f"- Raw findings: {len(raw_findings)}\n") + fh.write(f"- Active findings: {len(active_findings)}\n") + fh.write(f"- Waived findings: {len(waived_findings)}\n") + fh.write(f"- Baselined findings: {len(baselined_findings)}\n") + fh.write(f"- JSON artifact: {findings_json}\n") + fh.write(f"- SARIF artifact: {sarif_file}\n\n") + + fh.write("## Pass Status\n\n") + fh.write("\n".join(summary_lines) + "\n\n") + + fh.write("## Rule Coverage Accounting\n\n") + fh.write(f"- JSON: {rule_coverage_json}\n") + fh.write(f"- Markdown: {rule_coverage_md}\n\n") + + fh.write("## Auto-Enforced Rule Files\n\n") + if rule_files: + fh.write("\n".join(rule_files) + "\n\n") + else: + fh.write("(none discovered)\n\n") + + fh.write("## Changed Modules\n\n") + if modules: + fh.write("\n".join([f"{count}\t{module}" for count, module in modules]) + "\n\n") + else: + fh.write("(none)\n\n") + + fh.write("## Changed Files\n\n") + fh.write("\n".join(changed_files) + "\n\n") + + fh.write("## Hotspots\n\n") + fh.write(("\n".join(hotspots) if hotspots else "(none)") + "\n\n") + + for pass_file in sorted(output_root.glob("pass-*.md")): + fh.write(f"## {pass_file.stem}\n\n") + pass_text = pass_file.read_text(encoding="utf-8") + fh.write(pass_text) + if not pass_text.endswith("\n"): + fh.write("\n") + fh.write("\n") + + if args.publish_github: + publish_findings_to_github( + repo_root=repo_root, + findings=active_findings, + changed_files=changed_files, + combined_report=combined_report, + pr_number=args.github_pr, + review_mode=mode, + inline=args.github_inline, + ) + + print("\nDone.") + print(f"Per-pass outputs: {output_root}") + print(f"Combined report: {combined_report}") + + if active_findings: + print("Status: active findings detected.") + if fail_on_findings: + print("Exiting non-zero because fail-on-findings is enabled.", file=sys.stderr) + return 2 + else: + print("Status: no active findings in executed passes.") + + return 0 + + +def shutil_which(name: str) -> str | None: + paths = os.environ.get("PATH", "").split(os.pathsep) + for directory in paths: + candidate = Path(directory) / name + if candidate.exists() and os.access(candidate, os.X_OK): + return str(candidate) + return None + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="codexw", + description="Generic, profile-aware Codex wrapper for local PR-grade review.", + ) + sub = parser.add_subparsers(dest="command") + + review = sub.add_parser( + "review", + help="Run profile-driven PR-grade multi-pass review.", + ) + review_pr = sub.add_parser( + "review-pr", + help="Alias for 'review' (kept for backward compatibility).", + ) + + def add_review_args(target_parser: argparse.ArgumentParser) -> None: + target_parser.add_argument("--profile", help="Path to local-review-profile.yaml", default=None) + mode = target_parser.add_mutually_exclusive_group() + mode.add_argument("--base", help="Base branch", default=None) + mode.add_argument("--uncommitted", action="store_true", help="Review uncommitted changes") + mode.add_argument("--commit", help="Review a specific commit SHA", default=None) + mode.add_argument("--full-repo", action="store_true", help="Review full repository files") + target_parser.add_argument("--domains", help="Comma-separated domain list", default=None) + target_parser.add_argument("--depth-hotspots", type=int, help="Number of hotspot depth passes") + target_parser.add_argument( + "--max-files-per-shard", + type=int, + help="Shard breadth passes when changed files exceed this count (0 disables)", + ) + target_parser.add_argument("--max-shards", type=int, help="Max shards for breadth passes") + target_parser.add_argument( + "--parallel-shards", + type=int, + help="Execute shard passes in parallel with this worker count", + ) + target_parser.add_argument("--max-passes", type=int, help="Hard cap on executed passes") + target_parser.add_argument( + "--time-budget-minutes", + type=int, + help="Stop scheduling new passes after this runtime budget", + ) + target_parser.add_argument("--title", help="Optional review title", default=None) + target_parser.add_argument("--output-dir", help="Output directory for artifacts", default=None) + target_parser.add_argument("--model", help="Optional model override", default=None) + target_parser.add_argument("--waiver-file", help="Override waiver config path", default=None) + target_parser.add_argument("--baseline-file", help="Override baseline file path", default=None) + target_parser.add_argument( + "--update-baseline", + action="store_true", + help="Update baseline file from active findings (merge by default)", + ) + target_parser.add_argument( + "--replace-baseline", + action="store_true", + help="When used with --update-baseline, replace baseline instead of merge", + ) + target_parser.add_argument( + "--print-effective-profile", + action="store_true", + help="Print normalized profile and exit (no review execution)", + ) + target_parser.add_argument( + "--bootstrap-only", + action="store_true", + help="Create missing profile (if needed) and exit", + ) + target_parser.add_argument( + "--sync-profile-only", + action="store_true", + help="Sync profile from repository signals and exit", + ) + target_parser.add_argument( + "--no-bootstrap-profile", + action="store_true", + help="Disable automatic profile generation when missing", + ) + target_parser.add_argument( + "--no-sync-profile", + action="store_true", + help="Disable automatic profile sync from repository signals", + ) + target_parser.add_argument( + "--no-prune-autogen", + action="store_true", + help="Keep previously auto-managed entries even if no longer inferred", + ) + target_parser.add_argument("--fail-on-findings", action="store_true", help="Force strict gate") + target_parser.add_argument( + "--no-fail-on-findings", + action="store_true", + help="Exploratory mode; do not fail when findings exist", + ) + target_parser.add_argument( + "--publish-github", + action="store_true", + help="Publish review summary to current GitHub PR using gh CLI", + ) + target_parser.add_argument( + "--github-pr", + help="Explicit GitHub PR number for publish step", + default=None, + ) + target_parser.add_argument( + "--github-inline", + action="store_true", + help="Also publish inline PR review comments when line/path are available", + ) + + add_review_args(review) + add_review_args(review_pr) + + return parser + + +def main() -> int: + parser = build_parser() + args = parser.parse_args() + + if args.command in {"review", "review-pr"}: + return run_review(args) + + parser.print_help() + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/local-review-profile.example.yaml b/local-review-profile.example.yaml new file mode 100644 index 0000000..1bd7e06 --- /dev/null +++ b/local-review-profile.example.yaml @@ -0,0 +1,76 @@ +version: 1 + +repo: + name: Duolingo Android + +review: + default_base: master + strict_gate: true + depth_hotspots: 3 + output_root: .codex/review-runs + max_files_per_shard: 40 + max_shards: 5 + parallel_shards: 1 + max_passes: 0 + time_budget_minutes: 0 + waiver_file: .codex/review-waivers.yaml + baseline_file: .codex/review-baseline.json + +rules: + include: + - AGENTS.md + - .cursor/rules/**/*.mdc + +domains: + default: + - core + - experiments + - compose + - coroutines + - testing + allowed: + - core + - experiments + - compose + - coroutines + - testing + +prompts: + global: | + Prioritize behavior-changing issues over style concerns. + by_domain: + experiments: | + Focus on experiment gating, control/treatment leakage, and cleanup correctness. + compose: | + Focus on design-system usage, state/recomposition behavior, and lifecycle correctness. + coroutines: | + Focus on structured concurrency, cancellation, dispatcher correctness, and interop risks. + testing: | + Focus on missing tests for high-risk behavior and weak assertions. + +pipeline: + include_policy_pass: true + include_core_passes: true + include_domain_passes: true + include_depth_passes: true + core_passes: + - id: core-breadth + name: Core breadth coverage + shard: changed_files + instructions: | + Review every changed file in this shard and report actionable findings only. + - id: core-regressions + name: Core regressions + shard: none + instructions: | + Focus on behavioral regressions, crash/nullability, and security/privacy issues. + - id: core-architecture + name: Core architecture + shard: none + instructions: | + Focus on architecture boundaries, concurrency, and lifecycle correctness. + - id: core-tests + name: Core tests + shard: none + instructions: | + Focus on missing tests and high-risk edge cases without coverage. From 9bf68c3ce85046832379db23e29954ffbe5ef4c8 Mon Sep 17 00:00:00 2001 From: pavan Date: Thu, 19 Feb 2026 21:50:08 +0530 Subject: [PATCH 2/7] Trim codexw to essentials-only local review workflow --- README.md | 29 +- codexw | 1117 +++-------------------------- local-review-profile.example.yaml | 11 - 3 files changed, 108 insertions(+), 1049 deletions(-) diff --git a/README.md b/README.md index 78b4849..3825573 100644 --- a/README.md +++ b/README.md @@ -82,11 +82,10 @@ Canonical command is `codexw review`; `codexw review-pr` is kept as a compatibil If profile is missing, `codexw` auto-generates `local-review-profile.yaml` on first run. On each run, `codexw` auto-syncs profile entries derived from repository signals (rules/domains/domain prompts) while preserving manual overrides. Stale auto-managed entries are pruned when source-of-truth changes. -PR-grade outputs now include: -- deterministic rule-coverage accounting (`rule-coverage-accounting.json`) -- machine-readable findings (`findings.json`, `findings.sarif`) -- waiver + baseline filtering (for strict gate on net-new active findings) -- optional GitHub publish adapter (`--publish-github`, optional inline comments) +PR-grade outputs include: +- pass-level markdown reports +- combined markdown report (`combined-report.md`) +- machine-readable findings (`findings.json`) **Prerequisites:** - Install Codex CLI: `brew install codex` or `npm install -g @openai/codex` @@ -107,28 +106,16 @@ Direct execution (without pre-commit): ./codexw review ./codexw review --base main ./codexw review --domains core,testing --no-fail-on-findings -./codexw review --full-repo --max-files-per-shard 50 --parallel-shards 2 -# Runtime budget controls -./codexw review --max-passes 8 --time-budget-minutes 12 # Create missing profile and exit ./codexw review --bootstrap-only # Sync profile from repository signals and exit ./codexw review --sync-profile-only -# Use waivers/baseline files and update baseline from current active findings -./codexw review --waiver-file .codex/review-waivers.yaml --baseline-file .codex/review-baseline.json -./codexw review --update-baseline --no-fail-on-findings -# Replace baseline instead of merge -./codexw review --update-baseline --replace-baseline --no-fail-on-findings # Validate profile loading only (no Codex run) ./codexw review --print-effective-profile # Disable profile sync for one run ./codexw review --no-sync-profile # Keep stale auto-managed profile entries for this run ./codexw review --no-prune-autogen -# Publish summary (and optional inline comments) to current PR -./codexw review --publish-github -./codexw review --publish-github --github-inline -# Note: inline comments are automatically skipped in --full-repo mode ``` `local-review-profile.yaml` schema (minimum practical shape): @@ -143,13 +130,6 @@ review: strict_gate: true depth_hotspots: 3 output_root: .codex/review-runs - max_files_per_shard: 40 - max_shards: 5 - parallel_shards: 1 - max_passes: 0 # 0 = unlimited - time_budget_minutes: 0 # 0 = unlimited - waiver_file: .codex/review-waivers.yaml - baseline_file: .codex/review-baseline.json rules: include: @@ -177,7 +157,6 @@ pipeline: core_passes: - id: core-breadth name: Core breadth - shard: changed_files # none | changed_files instructions: | Custom breadth pass instructions. depth_instructions: | diff --git a/codexw b/codexw index 8cc8a50..657c68e 100755 --- a/codexw +++ b/codexw @@ -1,21 +1,17 @@ #!/usr/bin/env python3 -"""Generic Codex PR-grade review wrapper (profile-aware).""" +"""Generic Codex PR-grade review wrapper (profile-aware, essentials-only).""" from __future__ import annotations import argparse import datetime as dt -import fnmatch import glob -import hashlib import json import os import re import shlex import subprocess import sys -import time -from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path from typing import Any @@ -82,7 +78,6 @@ DEFAULT_CORE_PASS_SPECS: list[dict[str, str]] = [ "- Then output actionable findings using the required schema.\n" f"- If no actionable findings exist, include exactly this line: {NO_FINDINGS_SENTINEL}" ), - "shard": "changed_files", }, { "id": "core-regressions", @@ -94,7 +89,6 @@ DEFAULT_CORE_PASS_SPECS: list[dict[str, str]] = [ "- state corruption and data-loss risks\n" "- security and privacy issues" ), - "shard": "none", }, { "id": "core-architecture", @@ -106,7 +100,6 @@ DEFAULT_CORE_PASS_SPECS: list[dict[str, str]] = [ "- error-handling/fallback correctness\n" "- protocol/contract boundary failures" ), - "shard": "none", }, { "id": "core-tests", @@ -117,7 +110,6 @@ DEFAULT_CORE_PASS_SPECS: list[dict[str, str]] = [ "- high-risk edge cases without coverage\n" "- regressions likely to escape without tests" ), - "shard": "none", }, ] @@ -130,10 +122,6 @@ DEFAULT_DEPTH_PASS_INSTRUCTIONS = ( f"- If no actionable findings exist, include exactly this line: {NO_FINDINGS_SENTINEL}" ) -DEFAULT_WAIVER_FILE = ".codex/review-waivers.yaml" -DEFAULT_BASELINE_FILE = ".codex/review-baseline.json" - - def die(message: str, code: int = 1) -> None: print(f"error: {message}", file=sys.stderr) @@ -158,22 +146,6 @@ def run_checked(cmd: list[str], cwd: Path) -> str: return proc.stdout -def run_streaming(cmd: list[str], cwd: Path, out_file: Path) -> int: - with out_file.open("w", encoding="utf-8") as fh: - proc = subprocess.Popen( - cmd, - cwd=str(cwd), - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) - assert proc.stdout is not None - for line in proc.stdout: - print(line, end="") - fh.write(line) - return proc.wait() - - def run_captured(cmd: list[str], cwd: Path, out_file: Path, *, stream_output: bool) -> int: proc = subprocess.run( cmd, @@ -196,12 +168,9 @@ def run_review_pass_with_compat( target_desc: str, prompt: str, pass_name: str, - *, - stream_output: bool = True, ) -> None: - """Run one pass, with fallback for Codex CLI versions that reject prompt+target flags.""" primary_cmd = ["codex", "review", *target_args, prompt] - exit_code = run_captured(primary_cmd, repo_root, out_file, stream_output=stream_output) + exit_code = run_captured(primary_cmd, repo_root, out_file, stream_output=True) if exit_code == 0: return @@ -219,7 +188,7 @@ def run_review_pass_with_compat( "Apply review findings to the requested target using the repository context below." ) compat_cmd = ["codex", "review", f"{compat_prefix}\n\n{prompt}"] - exit_code = run_captured(compat_cmd, repo_root, out_file, stream_output=stream_output) + exit_code = run_captured(compat_cmd, repo_root, out_file, stream_output=True) if exit_code == 0: return @@ -248,7 +217,6 @@ def git_ref_exists(repo_root: Path, ref: str) -> bool: def detect_default_base(repo_root: Path) -> str: - # Prefer local branch refs first, then origin refs. for candidate in ("master", "main"): if git_ref_exists(repo_root, f"refs/heads/{candidate}"): return candidate @@ -311,7 +279,6 @@ def parse_yaml_mapping_fragment(raw: str) -> dict[str, Any]: except Exception: return {} - # Lightweight fallback parser for simple key/value YAML. parsed: dict[str, Any] = {} current_key: str | None = None for raw_line in text.splitlines(): @@ -388,29 +355,6 @@ def _to_boolish(value: Any) -> bool | None: return None -def _extract_rule_globs(meta: dict[str, Any]) -> list[str]: - globs: list[str] = [] - candidates: list[Any] = [ - meta.get("file_scope"), - meta.get("fileScope"), - meta.get("scope"), - meta.get("scopes"), - meta.get("globs"), - meta.get("files"), - meta.get("include"), - ] - for candidate in candidates: - for raw_item in to_string_list(candidate, []): - item = raw_item.strip() - if not item: - continue - lowered = item.lower() - if lowered in {"all files", "all", "*"}: - continue - globs.append(item) - return _unique(globs) - - def _extract_rule_domains(meta: dict[str, Any], rel_path: str) -> list[str]: domains: list[str] = [] domain_candidates = [ @@ -452,14 +396,14 @@ def discover_rule_metadata(repo_root: Path, patterns: list[str]) -> list[dict[st if always_apply is None: always_apply = _to_boolish(meta.get("alwaysApply")) description = str(meta.get("description", "")).strip() - row = { - "path": rel, - "always_apply": bool(always_apply) if always_apply is not None else False, - "file_globs": _extract_rule_globs(meta), - "domains": _extract_rule_domains(meta, rel), - "description": description, - } - rows.append(row) + rows.append( + { + "path": rel, + "always_apply": bool(always_apply) if always_apply is not None else False, + "domains": _extract_rule_domains(meta, rel), + "description": description, + } + ) return rows @@ -469,7 +413,6 @@ def infer_domains_from_rule_metadata(rule_metadata: list[dict[str, Any]]) -> lis for domain in to_string_list(row.get("domains"), []): domains.add(domain) - # Keep known domains first, then repo-specific domains. preferred = ["core", "experiments", "compose", "coroutines", "testing"] result = [d for d in preferred if d in domains] for domain in sorted(domains): @@ -508,13 +451,6 @@ def build_bootstrap_profile(repo_root: Path) -> dict[str, Any]: "strict_gate": True, "depth_hotspots": 3, "output_root": ".codex/review-runs", - "max_files_per_shard": 40, - "max_shards": 5, - "parallel_shards": 1, - "max_passes": 0, - "time_budget_minutes": 0, - "waiver_file": DEFAULT_WAIVER_FILE, - "baseline_file": DEFAULT_BASELINE_FILE, }, "rules": {"include": rule_patterns}, "domains": {"default": domains, "allowed": domains}, @@ -528,7 +464,6 @@ def build_bootstrap_profile(repo_root: Path) -> dict[str, Any]: def write_profile(path: Path, profile: dict[str, Any]) -> None: path.parent.mkdir(parents=True, exist_ok=True) - # Write JSON-compatible YAML to avoid hard dependency on PyYAML. path.write_text(json.dumps(profile, indent=2) + "\n", encoding="utf-8") @@ -562,7 +497,6 @@ def sync_profile_with_repo( *, prune_autogen: bool, ) -> tuple[dict[str, Any], bool]: - """Merge repository-derived signals into profile while preserving manual overrides.""" before = _stable(raw_profile) profile: dict[str, Any] = json.loads(json.dumps(raw_profile)) inferred = build_bootstrap_profile(repo_root) @@ -593,20 +527,6 @@ def sync_profile_with_repo( review["depth_hotspots"] = 3 if not str(review.get("output_root", "")).strip(): review["output_root"] = ".codex/review-runs" - if "max_files_per_shard" not in review: - review["max_files_per_shard"] = 40 - if "max_shards" not in review: - review["max_shards"] = 5 - if "parallel_shards" not in review: - review["parallel_shards"] = 1 - if "max_passes" not in review: - review["max_passes"] = 0 - if "time_budget_minutes" not in review: - review["time_budget_minutes"] = 0 - if not str(review.get("waiver_file", "")).strip(): - review["waiver_file"] = DEFAULT_WAIVER_FILE - if not str(review.get("baseline_file", "")).strip(): - review["baseline_file"] = DEFAULT_BASELINE_FILE rules = _ensure_dict(profile, "rules") existing_patterns = to_string_list(rules.get("include"), []) @@ -614,8 +534,7 @@ def sync_profile_with_repo( if prune_autogen and prev_autogen_rules: prev_rule_set = set(prev_autogen_rules) existing_patterns = [p for p in existing_patterns if p not in prev_rule_set] - merged_patterns = _unique(existing_patterns + inferred_patterns) - rules["include"] = merged_patterns + rules["include"] = _unique(existing_patterns + inferred_patterns) domains = _ensure_dict(profile, "domains") existing_allowed = to_string_list(domains.get("allowed"), []) @@ -625,10 +544,9 @@ def sync_profile_with_repo( prev_domain_set = set(prev_autogen_domains) existing_allowed = [d for d in existing_allowed if d not in prev_domain_set] existing_default = [d for d in existing_default if d not in prev_domain_set] + merged_allowed = _unique(existing_allowed + inferred_domains) merged_default = _unique(existing_default + inferred_domains) - - # Default must be subset of allowed. merged_default = [d for d in merged_default if d in set(merged_allowed)] if not merged_allowed: merged_allowed = ["core"] @@ -641,6 +559,7 @@ def sync_profile_with_repo( prompts = _ensure_dict(profile, "prompts") if not str(prompts.get("global", "")).strip(): prompts["global"] = inferred["prompts"]["global"] + by_domain = prompts.get("by_domain") if not isinstance(by_domain, dict): by_domain = {} @@ -650,17 +569,13 @@ def sync_profile_with_repo( for domain in merged_allowed: if domain not in inferred_by_domain: continue - inferred_prompt = inferred_by_domain[domain] existing_prompt = str(by_domain.get(domain, "")).strip() prev_prompt = str(prev_autogen_prompt_map.get(domain, "")).strip() - - # Add missing template prompts; refresh only if this entry was auto-managed and unchanged. if not existing_prompt: by_domain[domain] = inferred_prompt elif prev_prompt and existing_prompt == prev_prompt and existing_prompt != inferred_prompt: by_domain[domain] = inferred_prompt - new_autogen_prompt_map[domain] = inferred_prompt if prune_autogen: @@ -700,8 +615,6 @@ def sync_profile_with_repo( if domain in inferred_by_domain } else: - # Preserve full auto-managed provenance when prune is disabled so future prune runs can still - # remove stale entries safely. autogen["rules_include"] = _unique(prev_autogen_rules + inferred_patterns) autogen["domains"] = _unique(prev_autogen_domains + inferred_domains) preserved_prompt_map = dict(prev_autogen_prompt_map) @@ -722,7 +635,6 @@ def sync_profile_with_repo( def load_yaml_or_json(path: Path) -> dict[str, Any]: text = path.read_text(encoding="utf-8") - # Preferred: PyYAML try: import yaml # type: ignore @@ -735,7 +647,6 @@ def load_yaml_or_json(path: Path) -> dict[str, Any]: except Exception as exc: die(f"invalid YAML in {path}: {exc}") - # Fallback: JSON (YAML is superset of JSON) try: data = json.loads(text) except json.JSONDecodeError: @@ -748,27 +659,6 @@ def load_yaml_or_json(path: Path) -> dict[str, Any]: return data -def load_yaml_or_json_any(path: Path) -> Any: - text = path.read_text(encoding="utf-8") - - try: - import yaml # type: ignore - - return yaml.safe_load(text) - except ModuleNotFoundError: - pass - except Exception as exc: - die(f"invalid YAML in {path}: {exc}") - - try: - return json.loads(text) - except json.JSONDecodeError: - die( - f"{path} is not valid JSON-compatible YAML. " - "Install PyYAML (python3 -m pip install pyyaml) or provide JSON syntax." - ) - - def to_bool(value: Any, default: bool) -> bool: if value is None: return default @@ -799,7 +689,6 @@ def to_string_list(value: Any, default: list[str] | None = None) -> list[str]: if isinstance(value, list): return [str(x).strip() for x in value if str(x).strip()] if isinstance(value, str): - # Support comma-separated shorthand return [x.strip() for x in value.split(",") if x.strip()] return list(default or []) @@ -827,10 +716,10 @@ def normalize_profile(raw: dict[str, Any]) -> dict[str, Any]: allowed_domains = to_string_list(domains.get("allowed"), ["core"]) default_domains = to_string_list(domains.get("default"), allowed_domains) - if not default_domains: - default_domains = list(allowed_domains) if not allowed_domains: allowed_domains = ["core"] + if not default_domains: + default_domains = list(allowed_domains) domain_prompt_map = prompts.get("by_domain") if not isinstance(domain_prompt_map, dict): @@ -850,22 +739,18 @@ def normalize_profile(raw: dict[str, Any]) -> dict[str, Any]: instructions = str(raw_pass.get("instructions", "")).strip() if not instructions: continue - shard_mode = str(raw_pass.get("shard", "none")).strip().lower() - if shard_mode not in {"none", "changed_files"}: - shard_mode = "none" pipeline_core_passes.append( { "id": pass_id, "name": pass_name, "instructions": instructions, - "shard": shard_mode, } ) if not pipeline_core_passes: pipeline_core_passes = json.loads(json.dumps(default_pipeline["core_passes"])) - normalized = { + return { "version": str(raw.get("version", "1")), "repo_name": str(repo.get("name", "Repository")).strip() or "Repository", "default_base": str(review.get("default_base", "main")).strip() or "main", @@ -873,18 +758,7 @@ def normalize_profile(raw: dict[str, Any]) -> dict[str, Any]: "depth_hotspots": to_int(review.get("depth_hotspots"), 3), "output_root": str(review.get("output_root", ".codex/review-runs")).strip() or ".codex/review-runs", - "max_files_per_shard": to_int(review.get("max_files_per_shard"), 40), - "max_shards": to_int(review.get("max_shards"), 5), - "parallel_shards": max(1, to_int(review.get("parallel_shards"), 1)), - "max_passes": to_int(review.get("max_passes"), 0), - "time_budget_minutes": to_int(review.get("time_budget_minutes"), 0), - "waiver_file": str(review.get("waiver_file", DEFAULT_WAIVER_FILE)).strip() - or DEFAULT_WAIVER_FILE, - "baseline_file": str(review.get("baseline_file", DEFAULT_BASELINE_FILE)).strip() - or DEFAULT_BASELINE_FILE, - "rule_patterns": to_string_list( - rules.get("include"), ["AGENTS.md", ".cursor/rules/**/*.mdc"] - ), + "rule_patterns": to_string_list(rules.get("include"), ["AGENTS.md", ".cursor/rules/**/*.mdc"]), "default_domains": default_domains, "allowed_domains": allowed_domains, "global_prompt": str(prompts.get("global", "")).strip(), @@ -919,7 +793,6 @@ def normalize_profile(raw: dict[str, Any]) -> dict[str, Any]: or default_pipeline["depth_instructions"], }, } - return normalized def discover_rule_files(repo_root: Path, patterns: list[str]) -> list[str]: @@ -928,67 +801,24 @@ def discover_rule_files(repo_root: Path, patterns: list[str]) -> list[str]: expanded = glob.glob(str(repo_root / pattern), recursive=True) for abs_path in expanded: p = Path(abs_path) - if p.is_file(): - try: - rel = p.relative_to(repo_root) - except ValueError: - continue - matches.add(str(rel)) - return sorted(matches) - - -def is_git_repo(repo_root: Path) -> bool: - proc = subprocess.run( - ["git", "rev-parse", "--is-inside-work-tree"], - cwd=str(repo_root), - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - text=True, - ) - return proc.returncode == 0 - - -def collect_changed_files(repo_root: Path, mode: str, base: str, commit: str) -> list[str]: - if mode == "full_repo": - if is_git_repo(repo_root): - tracked = run_checked(["git", "ls-files"], repo_root) - others = run_checked(["git", "ls-files", "--others", "--exclude-standard"], repo_root) - return sorted( - { - line.strip() - for line in (tracked + "\n" + others).splitlines() - if line.strip() - } - ) - files: list[str] = [] - for abs_path in repo_root.rglob("*"): - if not abs_path.is_file(): + if not p.is_file(): continue try: - rel = abs_path.relative_to(repo_root) + rel = p.relative_to(repo_root) except ValueError: continue - if any(part.startswith(".git") for part in rel.parts): - continue - files.append(str(rel)) - return sorted(files) + matches.add(str(rel)) + return sorted(matches) + +def collect_changed_files(repo_root: Path, mode: str, base: str, commit: str) -> list[str]: if mode == "base": - cmd = ["git", "diff", "--name-only", f"{base}...HEAD"] - out = run_checked(cmd, repo_root) + out = run_checked(["git", "diff", "--name-only", f"{base}...HEAD"], repo_root) return sorted({line.strip() for line in out.splitlines() if line.strip()}) if mode == "uncommitted": out1 = run_checked(["git", "diff", "--name-only", "HEAD"], repo_root) - out2 = run_checked( - ["git", "ls-files", "--others", "--exclude-standard"], repo_root - ) - return sorted( - { - line.strip() - for line in (out1 + "\n" + out2).splitlines() - if line.strip() - } - ) + out2 = run_checked(["git", "ls-files", "--others", "--exclude-standard"], repo_root) + return sorted({line.strip() for line in (out1 + "\n" + out2).splitlines() if line.strip()}) if mode == "commit": out = run_checked(["git", "show", "--name-only", "--pretty=", commit], repo_root) return sorted({line.strip() for line in out.splitlines() if line.strip()}) @@ -996,27 +826,7 @@ def collect_changed_files(repo_root: Path, mode: str, base: str, commit: str) -> return [] -def collect_numstat( - repo_root: Path, - mode: str, - base: str, - commit: str, - *, - files_for_full_repo: list[str] | None = None, -) -> list[tuple[int, str]]: - if mode == "full_repo": - files = files_for_full_repo or collect_changed_files(repo_root, mode, base, commit) - rows: list[tuple[int, str]] = [] - for rel in files: - abs_path = repo_root / rel - try: - size = abs_path.stat().st_size - except OSError: - size = 0 - rows.append((size, rel)) - rows.sort(key=lambda x: x[0], reverse=True) - return rows - +def collect_numstat(repo_root: Path, mode: str, base: str, commit: str) -> list[tuple[int, str]]: if mode == "base": cmd = ["git", "diff", "--numstat", f"{base}...HEAD"] elif mode == "uncommitted": @@ -1089,7 +899,7 @@ def domain_prompt(domain: str, profile: dict[str, Any]) -> str: base = ( f"Domain focus: {domain}\n" f"- identify domain-specific correctness and policy violations for '{domain}'\n" - f"- prioritize regressions and production-risk behavior in changed code" + "- prioritize regressions and production-risk behavior in changed code" ) return base + ("\n" + custom if custom else "") @@ -1098,120 +908,6 @@ def sanitize_pass_id(value: str) -> str: return re.sub(r"[^a-zA-Z0-9_-]", "-", value.strip()).strip("-") or "pass" -def shard_files(files: list[str], max_files_per_shard: int, max_shards: int) -> list[list[str]]: - if max_files_per_shard <= 0 or len(files) <= max_files_per_shard: - return [files] - shards = [files[i : i + max_files_per_shard] for i in range(0, len(files), max_files_per_shard)] - if max_shards > 0 and len(shards) > max_shards: - keep = shards[: max_shards - 1] - tail: list[str] = [] - for shard in shards[max_shards - 1 :]: - tail.extend(shard) - keep.append(tail) - return keep - return shards - - -def build_rule_coverage_accounting( - rule_metadata: list[dict[str, Any]], - changed_files: list[str], -) -> list[dict[str, Any]]: - rows: list[dict[str, Any]] = [] - for rule in rule_metadata: - path = str(rule.get("path", "")).strip() - always_apply = bool(rule.get("always_apply")) - globs = to_string_list(rule.get("file_globs"), []) - matched_files: list[str] = [] - if globs: - for candidate in changed_files: - if any(fnmatch.fnmatch(candidate, pattern) for pattern in globs): - matched_files.append(candidate) - if always_apply: - applicability = "required" - elif globs and matched_files: - applicability = "applicable" - elif globs and not matched_files: - applicability = "not_applicable" - else: - applicability = "unknown_scope" - rows.append( - { - "rule_file": path, - "always_apply": always_apply, - "file_globs": globs, - "domains": to_string_list(rule.get("domains"), []), - "applicability": applicability, - "matched_files_count": len(matched_files), - "matched_files_sample": matched_files[:20], - } - ) - return rows - - -def finding_mentions_rule_file(finding_rule_text: str, rule_file: str) -> bool: - finding_text = finding_rule_text.strip().lower() - if not finding_text: - return False - normalized_rule = rule_file.strip().lower() - if normalized_rule and normalized_rule in finding_text: - return True - basename = Path(rule_file).name.strip().lower() - return bool(basename) and basename in finding_text - - -def evaluate_rule_coverage_rows( - rows: list[dict[str, Any]], - executed_passes: list[dict[str, Any]], - findings: list[dict[str, Any]], -) -> list[dict[str, Any]]: - executed_pass_ids = [str(p.get("id", "")) for p in executed_passes if str(p.get("id", "")).strip()] - - for row in rows: - rule_domains = to_string_list(row.get("domains"), []) - relevant_pass_ids: list[str] = [] - for pass_spec in executed_passes: - pass_id = str(pass_spec.get("id", "")).strip() - if not pass_id: - continue - kind = str(pass_spec.get("kind", "")).strip() - if kind == "policy": - relevant_pass_ids.append(pass_id) - continue - pass_domains = to_string_list(pass_spec.get("domains"), []) - if not rule_domains: - # Unknown domain mapping -> consider all non-domain passes as potentially relevant. - if kind in {"core", "depth"} or not pass_domains: - relevant_pass_ids.append(pass_id) - continue - if any(domain in pass_domains for domain in rule_domains): - relevant_pass_ids.append(pass_id) - - evidence_count = 0 - for finding in findings: - if finding_mentions_rule_file(str(finding.get("rule", "")), str(row.get("rule_file", ""))): - evidence_count += 1 - - applicability = str(row.get("applicability", "")).strip() - if applicability == "not_applicable": - coverage_status = "not_applicable_by_scope" - elif evidence_count > 0: - coverage_status = "evidenced_in_findings" - elif relevant_pass_ids: - coverage_status = "prompted_no_explicit_evidence" - elif executed_pass_ids: - coverage_status = "not_prompted_for_domain" - else: - coverage_status = "uncovered" - - row["relevant_executed_passes"] = relevant_pass_ids - row["relevant_executed_pass_count"] = len(relevant_pass_ids) - row["executed_pass_count"] = len(executed_pass_ids) - row["evidence_findings_count"] = evidence_count - row["coverage_status"] = coverage_status - - return rows - - def extract_line_number(raw: str) -> int | None: match = re.search(r"\d+", raw) if not match: @@ -1228,11 +924,8 @@ def normalize_finding_line(raw_line: str) -> str: if not line: return "" - # Allow markdown list styles: "-", "*", "1.", "1)". line = re.sub(r"^[-*+]\s*", "", line) line = re.sub(r"^\d+[.)]\s*", "", line) - - # Accept markdown-wrapped keys: **Severity:** P1, **Severity**: P1, __Type__: Bug, `Line`: 10. line = re.sub(r"^\*\*([^*]+)\*\*\s*", r"\1 ", line) line = re.sub(r"^__([^_]+)__\s*", r"\1 ", line) line = re.sub(r"^`([^`]+)`\s*", r"\1 ", line) @@ -1296,7 +989,6 @@ def parse_findings_from_pass(text: str, pass_id: str) -> list[dict[str, Any]]: elif re.match(r"(?i)^title\s*:", line): current["title"] = line.split(":", 1)[1].strip() else: - # Keep additional context attached to risk if available. if current.get("risk"): current["risk"] = f"{current['risk']} {line}".strip() @@ -1304,330 +996,6 @@ def parse_findings_from_pass(text: str, pass_id: str) -> list[dict[str, Any]]: return findings -def finding_fingerprint(finding: dict[str, Any]) -> str: - raw = "|".join( - [ - str(finding.get("severity", "")).upper(), - str(finding.get("type", "")).lower(), - str(finding.get("file_path", "")).lower(), - str(finding.get("line", "")), - str(finding.get("rule", "")).lower(), - str(finding.get("risk", "")).lower()[:200], - ] - ) - return hashlib.sha1(raw.encode("utf-8")).hexdigest() - - -def parse_date_yyyy_mm_dd(raw: str) -> dt.date | None: - value = raw.strip() - if not value: - return None - try: - return dt.datetime.strptime(value, "%Y-%m-%d").date() - except ValueError: - return None - - -def load_waivers(repo_root: Path, waiver_path: str) -> list[dict[str, Any]]: - path = Path(waiver_path) - if not path.is_absolute(): - path = repo_root / path - if not path.exists(): - return [] - - payload = load_yaml_or_json_any(path) - if isinstance(payload, dict): - rows = payload.get("waivers") - else: - rows = payload - if not isinstance(rows, list): - return [] - - normalized: list[dict[str, Any]] = [] - for row in rows: - if not isinstance(row, dict): - continue - normalized.append(row) - return normalized - - -def load_baseline_fingerprints(repo_root: Path, baseline_path: str) -> set[str]: - path = Path(baseline_path) - if not path.is_absolute(): - path = repo_root / path - if not path.exists(): - return set() - - payload = load_yaml_or_json_any(path) - rows: list[str] = [] - if isinstance(payload, dict): - rows = to_string_list(payload.get("fingerprints"), []) - elif isinstance(payload, list): - rows = [str(x).strip() for x in payload if str(x).strip()] - return {x for x in rows if x} - - -def waiver_matches(finding: dict[str, Any], waiver: dict[str, Any], today: dt.date) -> bool: - owner = str(waiver.get("owner", "")).strip() - reason = str(waiver.get("reason", "")).strip() - if not owner or not reason: - return False - - expiry_raw = str(waiver.get("expires_on", "")).strip() - if expiry_raw: - expiry = parse_date_yyyy_mm_dd(expiry_raw) - if not expiry: - return False - if expiry < today: - return False - - file_pattern = str(waiver.get("file", "")).strip() - if file_pattern and not fnmatch.fnmatch(str(finding.get("file_path", "")), file_pattern): - return False - - severity = str(waiver.get("severity", "")).strip().upper() - if severity and severity != str(finding.get("severity", "")).strip().upper(): - return False - - finding_type = str(waiver.get("type", "")).strip().lower() - if finding_type and finding_type != str(finding.get("type", "")).strip().lower(): - return False - - line_value = waiver.get("line") - if line_value is not None: - try: - expected_line = int(line_value) - except (TypeError, ValueError): - return False - if expected_line != int(finding.get("line") or 0): - return False - - contains = str(waiver.get("contains", "")).strip().lower() - if contains: - haystack = " ".join( - [ - str(finding.get("title", "")), - str(finding.get("risk", "")), - str(finding.get("fix", "")), - str(finding.get("rule", "")), - ] - ).lower() - if contains not in haystack: - return False - - return True - - -def apply_waivers_and_baseline( - findings: list[dict[str, Any]], - waivers: list[dict[str, Any]], - baseline_fingerprints: set[str], -) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]]]: - active: list[dict[str, Any]] = [] - waived: list[dict[str, Any]] = [] - baselined: list[dict[str, Any]] = [] - today = dt.datetime.utcnow().date() - - for finding in findings: - finding = dict(finding) - fp = finding_fingerprint(finding) - finding["fingerprint"] = fp - - matched_waiver: dict[str, Any] | None = None - for waiver in waivers: - if waiver_matches(finding, waiver, today): - matched_waiver = waiver - break - if matched_waiver is not None: - finding["waiver"] = { - "owner": str(matched_waiver.get("owner", "")).strip(), - "reason": str(matched_waiver.get("reason", "")).strip(), - "expires_on": str(matched_waiver.get("expires_on", "")).strip(), - } - finding["status"] = "waived" - waived.append(finding) - continue - - if fp in baseline_fingerprints: - finding["status"] = "baselined" - baselined.append(finding) - continue - - finding["status"] = "active" - active.append(finding) - - return active, waived, baselined - - -def findings_to_sarif(findings: list[dict[str, Any]]) -> dict[str, Any]: - level_map = {"P0": "error", "P1": "error", "P2": "warning", "P3": "note"} - rules_seen: set[str] = set() - rules: list[dict[str, Any]] = [] - results: list[dict[str, Any]] = [] - - for finding in findings: - finding_type = str(finding.get("type", "Unknown")).strip() or "Unknown" - rule_id = f"codexw/{finding_type}" - if rule_id not in rules_seen: - rules_seen.add(rule_id) - rules.append( - { - "id": rule_id, - "name": finding_type, - "shortDescription": {"text": f"Codexw finding: {finding_type}"}, - } - ) - - line = finding.get("line") - location: dict[str, Any] = { - "physicalLocation": { - "artifactLocation": {"uri": str(finding.get("file_path", ""))}, - "region": {"startLine": int(line) if isinstance(line, int) and line > 0 else 1}, - } - } - - msg_parts = [ - f"Severity {finding.get('severity', 'P2')}", - str(finding.get("risk", "")).strip(), - str(finding.get("fix", "")).strip(), - ] - message = " | ".join([part for part in msg_parts if part]) - - results.append( - { - "ruleId": rule_id, - "level": level_map.get(str(finding.get("severity", "P2")).upper(), "warning"), - "message": {"text": message or "Codexw finding"}, - "locations": [location], - } - ) - - return { - "version": "2.1.0", - "$schema": "https://json.schemastore.org/sarif-2.1.0.json", - "runs": [ - { - "tool": {"driver": {"name": "codexw", "rules": rules}}, - "results": results, - } - ], - } - - -def parse_remote_slug(repo_root: Path) -> str | None: - proc = subprocess.run( - ["git", "remote", "get-url", "origin"], - cwd=str(repo_root), - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - if proc.returncode != 0: - return None - - raw = proc.stdout.strip() - if not raw: - return None - - ssh_match = re.match(r"git@[^:]+:([^/]+)/(.+?)(?:\.git)?$", raw) - if ssh_match: - return f"{ssh_match.group(1)}/{ssh_match.group(2)}" - - https_match = re.match(r"https?://[^/]+/([^/]+)/(.+?)(?:\.git)?$", raw) - if https_match: - return f"{https_match.group(1)}/{https_match.group(2)}" - return None - - -def resolve_pr_number(repo_root: Path, explicit_pr: str | None) -> str | None: - if explicit_pr: - return explicit_pr - proc = subprocess.run( - ["gh", "pr", "view", "--json", "number", "--jq", ".number"], - cwd=str(repo_root), - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - if proc.returncode != 0: - return None - out = proc.stdout.strip() - return out or None - - -def publish_findings_to_github( - repo_root: Path, - findings: list[dict[str, Any]], - changed_files: list[str], - combined_report: Path, - pr_number: str | None, - review_mode: str, - *, - inline: bool, -) -> None: - if not shutil_which("gh"): - print("warning: gh CLI not found; skipping GitHub publish.", file=sys.stderr) - return - - slug = parse_remote_slug(repo_root) - if not slug: - print("warning: unable to resolve GitHub owner/repo; skipping GitHub publish.", file=sys.stderr) - return - - pr = resolve_pr_number(repo_root, pr_number) - if not pr: - print("warning: unable to resolve PR number; skipping GitHub publish.", file=sys.stderr) - return - - summary_cmd = ["gh", "pr", "comment", pr, "--repo", slug, "--body-file", str(combined_report)] - summary_proc = subprocess.run(summary_cmd, cwd=str(repo_root), text=True) - if summary_proc.returncode != 0: - print("warning: failed to publish summary PR comment via gh.", file=sys.stderr) - return - - if not inline: - return - - if review_mode == "full_repo": - print( - "warning: skipping inline GitHub comments for --full-repo mode; " - "inline API accepts only PR diff lines.", - file=sys.stderr, - ) - return - - inline_comments: list[dict[str, Any]] = [] - changed_set = set(changed_files) - for finding in findings: - path = str(finding.get("file_path", "")).strip() - line = finding.get("line") - if not path or not isinstance(line, int) or line <= 0: - continue - if changed_set and path not in changed_set: - continue - body = ( - f"[{finding.get('severity', 'P2')}] {finding.get('type', 'Finding')} - " - f"{finding.get('risk', '')}\n" - f"Fix: {finding.get('fix', '')}" - ).strip() - inline_comments.append({"path": path, "line": line, "side": "RIGHT", "body": body[:5000]}) - if len(inline_comments) >= 30: - break - - if not inline_comments: - return - - payload = {"body": "Codexw inline findings", "event": "COMMENT", "comments": inline_comments} - proc = subprocess.run( - ["gh", "api", "--method", "POST", f"repos/{slug}/pulls/{pr}/reviews", "--input", "-"], - cwd=str(repo_root), - text=True, - input=json.dumps(payload), - ) - if proc.returncode != 0: - print("warning: failed to publish inline review comments via gh api.", file=sys.stderr) - - def run_review(args: argparse.Namespace) -> int: repo_root = find_repo_root(Path.cwd()) os.chdir(repo_root) @@ -1662,8 +1030,6 @@ def run_review(args: argparse.Namespace) -> int: if args.sync_profile_only and args.no_sync_profile: die("--sync-profile-only cannot be combined with --no-sync-profile") - if args.replace_baseline and not args.update_baseline: - die("--replace-baseline requires --update-baseline") raw_profile = load_yaml_or_json(profile_path) if args.no_sync_profile: @@ -1712,9 +1078,7 @@ def run_review(args: argparse.Namespace) -> int: mode = "base" base_branch = args.base or profile["default_base"] commit_sha = args.commit or "" - if args.full_repo: - mode = "full_repo" - elif args.uncommitted: + if args.uncommitted: mode = "uncommitted" elif args.commit: mode = "commit" @@ -1726,22 +1090,6 @@ def run_review(args: argparse.Namespace) -> int: fail_on_findings = False depth_hotspots = args.depth_hotspots if args.depth_hotspots is not None else profile["depth_hotspots"] - max_files_per_shard = ( - args.max_files_per_shard - if args.max_files_per_shard is not None - else profile["max_files_per_shard"] - ) - max_shards = args.max_shards if args.max_shards is not None else profile["max_shards"] - parallel_shards = ( - args.parallel_shards if args.parallel_shards is not None else profile["parallel_shards"] - ) - max_passes = args.max_passes if args.max_passes is not None else profile["max_passes"] - time_budget_minutes = ( - args.time_budget_minutes - if args.time_budget_minutes is not None - else profile["time_budget_minutes"] - ) - time_budget_seconds = max(0, time_budget_minutes) * 60 allowed_domains = profile["allowed_domains"] default_domains = profile["default_domains"] @@ -1752,10 +1100,7 @@ def run_review(args: argparse.Namespace) -> int: unknown = [d for d in selected_domains if d not in allowed_domains] if unknown: - die( - f"invalid domain(s): {', '.join(unknown)}. " - f"Allowed: {', '.join(allowed_domains)}" - ) + die(f"invalid domain(s): {', '.join(unknown)}. Allowed: {', '.join(allowed_domains)}") ts = dt.datetime.now().strftime("%Y%m%d-%H%M%S") output_root = Path(args.output_dir) if args.output_dir else Path(profile["output_root"]) / ts @@ -1771,11 +1116,9 @@ def run_review(args: argparse.Namespace) -> int: elif mode == "uncommitted": target_args += ["--uncommitted"] target_desc = "uncommitted changes" - elif mode == "commit": + else: target_args += ["--commit", commit_sha] target_desc = f"commit: {commit_sha}" - else: - target_desc = "full repository" if args.title: target_args += ["--title", args.title] @@ -1785,34 +1128,29 @@ def run_review(args: argparse.Namespace) -> int: target_args += ["-c", f'model="{model_override}"'] rule_files = discover_rule_files(repo_root, profile["rule_patterns"]) - rule_metadata = discover_rule_metadata(repo_root, profile["rule_patterns"]) - auto_rule_file = output_root / "enforced-rule-files.txt" - auto_rule_file.write_text("\n".join(rule_files) + ("\n" if rule_files else ""), encoding="utf-8") + (output_root / "enforced-rule-files.txt").write_text( + "\n".join(rule_files) + ("\n" if rule_files else ""), + encoding="utf-8", + ) changed_files = collect_changed_files(repo_root, mode, base_branch, commit_sha) - changed_files_file = output_root / "changed-files.txt" - changed_files_file.write_text( - "\n".join(changed_files) + ("\n" if changed_files else ""), encoding="utf-8" + (output_root / "changed-files.txt").write_text( + "\n".join(changed_files) + ("\n" if changed_files else ""), + encoding="utf-8", ) modules = changed_modules(changed_files) - changed_modules_file = output_root / "changed-modules.txt" - changed_modules_file.write_text( - "\n".join([f"{count}\t{module}" for count, module in modules]) - + ("\n" if modules else ""), + (output_root / "changed-modules.txt").write_text( + "\n".join([f"{count}\t{module}" for count, module in modules]) + ("\n" if modules else ""), encoding="utf-8", ) - numstat = collect_numstat( - repo_root, - mode, - base_branch, - commit_sha, - files_for_full_repo=changed_files, - ) + numstat = collect_numstat(repo_root, mode, base_branch, commit_sha) hotspots = [path for _, path in numstat[: depth_hotspots if depth_hotspots > 0 else 0]] - hotspots_file = output_root / "hotspots.txt" - hotspots_file.write_text("\n".join(hotspots) + ("\n" if hotspots else ""), encoding="utf-8") + (output_root / "hotspots.txt").write_text( + "\n".join(hotspots) + ("\n" if hotspots else ""), + encoding="utf-8", + ) if not changed_files: combined_report = output_root / "combined-report.md" @@ -1837,8 +1175,6 @@ def run_review(args: argparse.Namespace) -> int: print(f"Combined report: {combined_report}") return 0 - rule_coverage_rows = build_rule_coverage_accounting(rule_metadata, changed_files) - base_rubric = ( f"Act as a strict PR gate reviewer for {profile['repo_name']}.\n" "Return only actionable findings.\n\n" @@ -1859,77 +1195,46 @@ def run_review(args: argparse.Namespace) -> int: ) global_prompt = profile.get("global_prompt", "") - full_diff_context = build_diff_context(changed_files, modules, hotspots) + diff_context = build_diff_context(changed_files, modules, hotspots) rules_block = rule_block(rule_files) - def pass_prompt(extra: str, *, context_override: str | None = None) -> str: - parts = [base_rubric, rules_block, context_override or full_diff_context] + def pass_prompt(extra: str) -> str: + parts = [base_rubric, rules_block, diff_context] if global_prompt: parts.append("Profile global context:\n" + global_prompt) parts.append(extra) return "\n\n".join([p for p in parts if p.strip()]) pipeline = profile["pipeline"] - passes: list[dict[str, Any]] = [] + passes: list[tuple[str, str, str]] = [] pass_counter = 0 if pipeline.get("include_policy_pass", True): - pass_counter += 1 - passes.append( - { - "id": f"pass-{pass_counter}-policy-sweep", - "name": "Policy: full standards coverage sweep", - "prompt": pass_prompt(str(pipeline.get("policy_instructions", ""))), - "parallel_group": "", - "kind": "policy", - "domains": list(selected_domains), - } + pass_counter += 1 + passes.append( + ( + f"pass-{pass_counter}-policy-sweep", + "Policy: full standards coverage sweep", + pass_prompt(str(pipeline.get("policy_instructions", ""))), ) + ) if pipeline.get("include_core_passes", True) and "core" in selected_domains: core_passes = pipeline.get("core_passes") or [] - shards = shard_files(changed_files, max_files_per_shard, max_shards) for core_pass in core_passes: pass_id = sanitize_pass_id(str(core_pass.get("id", "core-pass"))) pass_name = str(core_pass.get("name", pass_id)).strip() or pass_id instructions = str(core_pass.get("instructions", "")).strip() if not instructions: continue - shard_mode = str(core_pass.get("shard", "none")).strip().lower() - if shard_mode == "changed_files" and len(shards) > 1: - for shard_index, shard in enumerate(shards, start=1): - shard_modules = changed_modules(shard) - shard_hotspots = [h for h in hotspots if h in set(shard)] - shard_context = build_diff_context(shard, shard_modules, shard_hotspots) - pass_counter += 1 - passes.append( - { - "id": f"pass-{pass_counter}-{pass_id}-shard-{shard_index}", - "name": f"{pass_name} (shard {shard_index}/{len(shards)})", - "prompt": pass_prompt( - "Shard scope:\n" - f"- shard index: {shard_index}/{len(shards)}\n" - "- Review only files in this shard section, while using full repo context for dependencies.\n\n" - + instructions, - context_override=shard_context, - ), - "parallel_group": f"core-{pass_id}-shards", - "kind": "core", - "domains": ["core"], - } - ) - else: - pass_counter += 1 - passes.append( - { - "id": f"pass-{pass_counter}-{pass_id}", - "name": pass_name, - "prompt": pass_prompt(instructions), - "parallel_group": "", - "kind": "core", - "domains": ["core"], - } + pass_counter += 1 + passes.append( + ( + f"pass-{pass_counter}-{pass_id}", + pass_name, + pass_prompt(instructions), ) + ) if pipeline.get("include_domain_passes", True): for domain in selected_domains: @@ -1938,14 +1243,11 @@ def run_review(args: argparse.Namespace) -> int: pass_counter += 1 slug = sanitize_pass_id(domain) passes.append( - { - "id": f"pass-{pass_counter}-domain-{slug}", - "name": f"Domain: {domain}", - "prompt": pass_prompt(domain_prompt(domain, profile)), - "parallel_group": "", - "kind": "domain", - "domains": [domain], - } + ( + f"pass-{pass_counter}-domain-{slug}", + f"Domain: {domain}", + pass_prompt(domain_prompt(domain, profile)), + ) ) if pipeline.get("include_depth_passes", True): @@ -1958,29 +1260,22 @@ def run_review(args: argparse.Namespace) -> int: except Exception: depth_instructions = DEFAULT_DEPTH_PASS_INSTRUCTIONS.format(hotspot=hotspot) passes.append( - { - "id": f"pass-{pass_counter}-depth-{hotspot_slug}", - "name": f"Depth hotspot: {hotspot}", - "prompt": pass_prompt(depth_instructions), - "parallel_group": "", - "kind": "depth", - "domains": list(selected_domains), - } + ( + f"pass-{pass_counter}-depth-{hotspot_slug}", + f"Depth hotspot: {hotspot}", + pass_prompt(depth_instructions), + ) ) if not passes: die("no review passes configured; check profile.pipeline settings") - run_started = time.monotonic() - executed_passes: dict[str, dict[str, Any]] = {} - skipped_passes: dict[str, str] = {} + summary_lines: list[str] = [] + raw_findings: list[dict[str, Any]] = [] - def run_one_pass(pass_spec: dict[str, Any], ordinal: int, total: int, *, stream_output: bool) -> dict[str, Any]: - pass_id = str(pass_spec["id"]) - pass_name = str(pass_spec["name"]) - prompt = str(pass_spec["prompt"]) + for index, (pass_id, pass_name, prompt) in enumerate(passes, start=1): out_file = output_root / f"{pass_id}.md" - print(f"\n==> ({ordinal}/{total}) {pass_name}") + print(f"\n==> ({index}/{len(passes)}) {pass_name}") run_review_pass_with_compat( repo_root=repo_root, out_file=out_file, @@ -1988,8 +1283,8 @@ def run_review(args: argparse.Namespace) -> int: target_desc=target_desc, prompt=prompt, pass_name=pass_name, - stream_output=stream_output, ) + text = out_file.read_text(encoding="utf-8", errors="replace") parsed = parse_findings_from_pass(text, pass_id) no_findings = pass_has_no_findings(text, parsed) @@ -2008,169 +1303,31 @@ def run_review(args: argparse.Namespace) -> int: "pass_id": pass_id, } ] - return { - "id": pass_id, - "name": pass_name, - "out_file": str(out_file), - "parsed_findings": parsed, - "no_findings": no_findings, - } - - index = 0 - executed_count = 0 - while index < len(passes): - if max_passes > 0 and executed_count >= max_passes: - for pass_spec in passes[index:]: - skipped_passes[str(pass_spec["id"])] = "max_passes" - break - - elapsed = time.monotonic() - run_started - if time_budget_seconds > 0 and elapsed >= time_budget_seconds: - for pass_spec in passes[index:]: - skipped_passes[str(pass_spec["id"])] = "time_budget" - break - - current = passes[index] - group = str(current.get("parallel_group", "")).strip() - if group and parallel_shards > 1: - batch: list[dict[str, Any]] = [] - cursor = index - while cursor < len(passes): - candidate = passes[cursor] - if str(candidate.get("parallel_group", "")).strip() != group: - break - batch.append(candidate) - cursor += 1 - - if max_passes > 0: - remaining = max_passes - executed_count - if remaining <= 0: - for pass_spec in batch: - skipped_passes[str(pass_spec["id"])] = "max_passes" - index = cursor - continue - if len(batch) > remaining: - for pass_spec in batch[remaining:]: - skipped_passes[str(pass_spec["id"])] = "max_passes" - batch = batch[:remaining] - - with ThreadPoolExecutor(max_workers=parallel_shards) as executor: - future_map = { - executor.submit( - run_one_pass, - pass_spec, - (index + offset + 1), - len(passes), - stream_output=False, - ): pass_spec - for offset, pass_spec in enumerate(batch) - } - for future in as_completed(future_map): - result = future.result() - executed_passes[result["id"]] = result - executed_count += 1 - - index = cursor - continue - - result = run_one_pass(current, index + 1, len(passes), stream_output=True) - executed_passes[result["id"]] = result - executed_count += 1 - index += 1 - summary_lines: list[str] = [] - raw_findings: list[dict[str, Any]] = [] - executed_pass_ids = [p["id"] for p in passes if p["id"] in executed_passes] - for pass_spec in passes: - pass_id = str(pass_spec["id"]) - pass_name = str(pass_spec["name"]) - if pass_id in executed_passes: - result = executed_passes[pass_id] - if result["no_findings"]: - summary_lines.append(f"- [PASS] {pass_name}") - else: - summary_lines.append(f"- [FINDINGS] {pass_name}") - raw_findings.extend(result["parsed_findings"]) + if no_findings: + summary_lines.append(f"- [PASS] {pass_name}") else: - reason = skipped_passes.get(pass_id, "skipped") - summary_lines.append(f"- [SKIPPED:{reason}] {pass_name}") - - executed_pass_specs = [spec for spec in passes if str(spec.get("id", "")) in set(executed_pass_ids)] - rule_coverage_rows = evaluate_rule_coverage_rows( - rows=rule_coverage_rows, - executed_passes=executed_pass_specs, - findings=raw_findings, - ) - - rule_coverage_json = output_root / "rule-coverage-accounting.json" - rule_coverage_json.write_text(json.dumps(rule_coverage_rows, indent=2) + "\n", encoding="utf-8") - rule_coverage_md = output_root / "rule-coverage-accounting.md" - with rule_coverage_md.open("w", encoding="utf-8") as fh: - fh.write("# Rule Coverage Accounting\n\n") - for row in rule_coverage_rows: - fh.write( - f"- {row['rule_file']} :: applicability={row['applicability']} " - f":: status={row['coverage_status']} " - f":: relevant_passes={row['relevant_executed_pass_count']} " - f":: evidence_findings={row['evidence_findings_count']} " - f":: matched_files={row['matched_files_count']}\n" - ) + summary_lines.append(f"- [FINDINGS] {pass_name}") + raw_findings.extend(parsed) - waiver_file = str(args.waiver_file or profile["waiver_file"]).strip() or DEFAULT_WAIVER_FILE - baseline_file = str(args.baseline_file or profile["baseline_file"]).strip() or DEFAULT_BASELINE_FILE - waivers = load_waivers(repo_root, waiver_file) - baseline_fingerprints = load_baseline_fingerprints(repo_root, baseline_file) - - active_findings, waived_findings, baselined_findings = apply_waivers_and_baseline( - raw_findings, - waivers, - baseline_fingerprints, - ) - - if args.update_baseline: - baseline_path = Path(baseline_file) - if not baseline_path.is_absolute(): - baseline_path = repo_root / baseline_path - baseline_path.parent.mkdir(parents=True, exist_ok=True) - new_fingerprint_set = {finding["fingerprint"] for finding in active_findings} - if args.replace_baseline: - final_fingerprints = sorted(new_fingerprint_set) - baseline_mode = "replace" - else: - final_fingerprints = sorted(set(baseline_fingerprints) | new_fingerprint_set) - baseline_mode = "merge" - baseline_payload = { - "updated_utc": dt.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), - "mode": baseline_mode, - "fingerprints": final_fingerprints, - } - baseline_path.write_text(json.dumps(baseline_payload, indent=2) + "\n", encoding="utf-8") - print( - f"Updated baseline file ({baseline_mode}): {baseline_path} " - f"[count={len(final_fingerprints)}]" - ) + (output_root / "pass-status.md").write_text("\n".join(summary_lines) + "\n", encoding="utf-8") findings_json = output_root / "findings.json" - findings_payload = { - "generated_utc": dt.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), - "target": target_desc, - "counts": { - "raw": len(raw_findings), - "active": len(active_findings), - "waived": len(waived_findings), - "baselined": len(baselined_findings), - }, - "active_findings": active_findings, - "waived_findings": waived_findings, - "baselined_findings": baselined_findings, - } - findings_json.write_text(json.dumps(findings_payload, indent=2) + "\n", encoding="utf-8") - - sarif_file = output_root / "findings.sarif" - sarif_file.write_text(json.dumps(findings_to_sarif(active_findings), indent=2) + "\n", encoding="utf-8") - - summary_file = output_root / "pass-status.md" - summary_file.write_text("\n".join(summary_lines) + "\n", encoding="utf-8") + findings_json.write_text( + json.dumps( + { + "generated_utc": dt.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), + "target": target_desc, + "counts": { + "active": len(raw_findings), + }, + "active_findings": raw_findings, + }, + indent=2, + ) + + "\n", + encoding="utf-8", + ) combined_report = output_root / "combined-report.md" with combined_report.open("w", encoding="utf-8") as fh: @@ -2187,30 +1344,20 @@ def run_review(args: argparse.Namespace) -> int: fh.write(f"- Auto-enforced rule files: {len(rule_files)}\n") fh.write(f"- Changed files: {len(changed_files)}\n") fh.write(f"- Depth hotspots: {depth_hotspots}\n") - fh.write(f"- Pass count (planned/executed): {len(passes)}/{len(executed_pass_ids)}\n") - fh.write(f"- Waiver file: {waiver_file}\n") - fh.write(f"- Baseline file: {baseline_file}\n") if args.title: fh.write(f"- Title: {args.title}\n") if model_override: fh.write(f"- Model override: {model_override}\n") + fh.write(f"- Pass count: {len(passes)}\n") fh.write(f"- Profile file: {profile_display}\n\n") fh.write("## Findings Summary\n\n") - fh.write(f"- Raw findings: {len(raw_findings)}\n") - fh.write(f"- Active findings: {len(active_findings)}\n") - fh.write(f"- Waived findings: {len(waived_findings)}\n") - fh.write(f"- Baselined findings: {len(baselined_findings)}\n") - fh.write(f"- JSON artifact: {findings_json}\n") - fh.write(f"- SARIF artifact: {sarif_file}\n\n") + fh.write(f"- Active findings: {len(raw_findings)}\n") + fh.write(f"- JSON artifact: {findings_json}\n\n") fh.write("## Pass Status\n\n") fh.write("\n".join(summary_lines) + "\n\n") - fh.write("## Rule Coverage Accounting\n\n") - fh.write(f"- JSON: {rule_coverage_json}\n") - fh.write(f"- Markdown: {rule_coverage_md}\n\n") - fh.write("## Auto-Enforced Rule Files\n\n") if rule_files: fh.write("\n".join(rule_files) + "\n\n") @@ -2237,22 +1384,11 @@ def run_review(args: argparse.Namespace) -> int: fh.write("\n") fh.write("\n") - if args.publish_github: - publish_findings_to_github( - repo_root=repo_root, - findings=active_findings, - changed_files=changed_files, - combined_report=combined_report, - pr_number=args.github_pr, - review_mode=mode, - inline=args.github_inline, - ) - print("\nDone.") print(f"Per-pass outputs: {output_root}") print(f"Combined report: {combined_report}") - if active_findings: + if raw_findings: print("Status: active findings detected.") if fail_on_findings: print("Exiting non-zero because fail-on-findings is enabled.", file=sys.stderr) @@ -2294,41 +1430,11 @@ def build_parser() -> argparse.ArgumentParser: mode.add_argument("--base", help="Base branch", default=None) mode.add_argument("--uncommitted", action="store_true", help="Review uncommitted changes") mode.add_argument("--commit", help="Review a specific commit SHA", default=None) - mode.add_argument("--full-repo", action="store_true", help="Review full repository files") target_parser.add_argument("--domains", help="Comma-separated domain list", default=None) target_parser.add_argument("--depth-hotspots", type=int, help="Number of hotspot depth passes") - target_parser.add_argument( - "--max-files-per-shard", - type=int, - help="Shard breadth passes when changed files exceed this count (0 disables)", - ) - target_parser.add_argument("--max-shards", type=int, help="Max shards for breadth passes") - target_parser.add_argument( - "--parallel-shards", - type=int, - help="Execute shard passes in parallel with this worker count", - ) - target_parser.add_argument("--max-passes", type=int, help="Hard cap on executed passes") - target_parser.add_argument( - "--time-budget-minutes", - type=int, - help="Stop scheduling new passes after this runtime budget", - ) target_parser.add_argument("--title", help="Optional review title", default=None) target_parser.add_argument("--output-dir", help="Output directory for artifacts", default=None) target_parser.add_argument("--model", help="Optional model override", default=None) - target_parser.add_argument("--waiver-file", help="Override waiver config path", default=None) - target_parser.add_argument("--baseline-file", help="Override baseline file path", default=None) - target_parser.add_argument( - "--update-baseline", - action="store_true", - help="Update baseline file from active findings (merge by default)", - ) - target_parser.add_argument( - "--replace-baseline", - action="store_true", - help="When used with --update-baseline, replace baseline instead of merge", - ) target_parser.add_argument( "--print-effective-profile", action="store_true", @@ -2357,7 +1463,7 @@ def build_parser() -> argparse.ArgumentParser: target_parser.add_argument( "--no-prune-autogen", action="store_true", - help="Keep previously auto-managed entries even if no longer inferred", + help="Keep stale auto-managed profile entries for this run", ) target_parser.add_argument("--fail-on-findings", action="store_true", help="Force strict gate") target_parser.add_argument( @@ -2365,21 +1471,6 @@ def build_parser() -> argparse.ArgumentParser: action="store_true", help="Exploratory mode; do not fail when findings exist", ) - target_parser.add_argument( - "--publish-github", - action="store_true", - help="Publish review summary to current GitHub PR using gh CLI", - ) - target_parser.add_argument( - "--github-pr", - help="Explicit GitHub PR number for publish step", - default=None, - ) - target_parser.add_argument( - "--github-inline", - action="store_true", - help="Also publish inline PR review comments when line/path are available", - ) add_review_args(review) add_review_args(review_pr) diff --git a/local-review-profile.example.yaml b/local-review-profile.example.yaml index 1bd7e06..2c96427 100644 --- a/local-review-profile.example.yaml +++ b/local-review-profile.example.yaml @@ -8,13 +8,6 @@ review: strict_gate: true depth_hotspots: 3 output_root: .codex/review-runs - max_files_per_shard: 40 - max_shards: 5 - parallel_shards: 1 - max_passes: 0 - time_budget_minutes: 0 - waiver_file: .codex/review-waivers.yaml - baseline_file: .codex/review-baseline.json rules: include: @@ -56,21 +49,17 @@ pipeline: core_passes: - id: core-breadth name: Core breadth coverage - shard: changed_files instructions: | Review every changed file in this shard and report actionable findings only. - id: core-regressions name: Core regressions - shard: none instructions: | Focus on behavioral regressions, crash/nullability, and security/privacy issues. - id: core-architecture name: Core architecture - shard: none instructions: | Focus on architecture boundaries, concurrency, and lifecycle correctness. - id: core-tests name: Core tests - shard: none instructions: | Focus on missing tests and high-risk edge cases without coverage. From 996e0dcd906e27396d6b571919ff93fcf0c051bf Mon Sep 17 00:00:00 2001 From: pavan Date: Fri, 20 Feb 2026 15:40:31 +0530 Subject: [PATCH 3/7] Harden codexw fallback YAML parsing and update PR-grade hook docs --- .pre-commit-hooks.yaml | 17 - Makefile | 2 + README.md | 10 +- codexw | 581 +++++++++++++++++++++++++++++- test/codexw_fallback_yaml_test.py | 169 +++++++++ 5 files changed, 748 insertions(+), 31 deletions(-) create mode 100644 test/codexw_fallback_yaml_test.py diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 0e209bf..3f7f3aa 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -35,15 +35,6 @@ stages: [manual] verbose: true -- id: codexw - name: Codex AI Code Review (PR-grade, codexw) - description: Alias for codex-review-pr-grade. Run with: pre-commit run codexw - entry: ./codexw review - language: script - pass_filenames: false - stages: [manual] - verbose: true - # Nobody should ever use these hooks in production. They're just for testing PRs in # the duolingo/pre-commit-hooks repo more easily without having to tag and push # temporary images to Docker Hub. Usage: edit a consumer repo's hook config to @@ -77,11 +68,3 @@ pass_filenames: false stages: [manual] verbose: true - -- id: codexw-dev - name: Codex AI Code Review (PR-grade, codexw dev) - entry: ./codexw review - language: script - pass_filenames: false - stages: [manual] - verbose: true diff --git a/Makefile b/Makefile index 103d265..b0c9f03 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,8 @@ shell: # Runs tests .PHONY: test test: + echo "Running codexw fallback parser tests..." + python3 test/codexw_fallback_yaml_test.py docker run --rm -v "$${PWD}/test:/test" "$$(docker build --network=host -q .)" sh -c \ 'cd /tmp \ && cp -r /test/before actual \ diff --git a/README.md b/README.md index 3825573..3932f4f 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ codex review --uncommitted codex review --base master ``` -## Codex PR-grade Hook (`codexw`) +## Codex PR-grade Hook (`codex-review-pr-grade`) Profile-aware multi-pass local review using `codexw`. This hook is also `manual` by default and does not block normal commits. @@ -95,10 +95,10 @@ PR-grade outputs include: **Usage:** ```bash # Run PR-grade review for current diff vs profile default base branch -pre-commit run codexw +pre-commit run codex-review-pr-grade # Run PR-grade review for all files (still uses profile + pass orchestration) -pre-commit run codexw --all-files +pre-commit run codex-review-pr-grade --all-files ``` Direct execution (without pre-commit): @@ -167,7 +167,7 @@ pipeline: Reference profile: `local-review-profile.example.yaml` -Backward-compatible hook id alias is available: +Hook id for pre-commit: `codex-review-pr-grade` ## Usage @@ -192,7 +192,7 @@ Repo maintainers can declare these hooks in `.pre-commit-config.yaml`: # On-demand Codex AI code review (manual stage, requires codex CLI) - id: codex-review # On-demand PR-grade Codex review (manual stage, profile-aware) - - id: codexw + - id: codex-review-pr-grade ``` Directories named `build` and `node_modules` are excluded by default - no need to declare them in the hook's `exclude` key. diff --git a/codexw b/codexw index 657c68e..52518ea 100755 --- a/codexw +++ b/codexw @@ -462,9 +462,530 @@ def build_bootstrap_profile(repo_root: Path) -> dict[str, Any]: } +def _yaml_plain_scalar_allowed(value: str) -> bool: + if not value or value.strip() != value: + return False + if any(ch in value for ch in ":#{}[]&,*!?|>'\"%@`"): + return False + if value[0] in "-?:!&*@`": + return False + if "\n" in value or "\r" in value or "\t" in value: + return False + lowered = value.lower() + if lowered in {"true", "false", "null", "~", "yes", "no", "on", "off"}: + return False + if re.fullmatch(r"[+-]?\d+(?:\.\d+)?", value): + return False + return True + + +def _yaml_inline_scalar(value: Any) -> str: + if value is None: + return "null" + if isinstance(value, bool): + return "true" if value else "false" + if isinstance(value, (int, float)): + return str(value) + text = str(value) + if _yaml_plain_scalar_allowed(text): + return text + return json.dumps(text) + + +def _yaml_emit(value: Any, indent: int = 0) -> list[str]: + pad = " " * indent + + if isinstance(value, dict): + if not value: + return [pad + "{}"] + lines: list[str] = [] + for key, raw_val in value.items(): + key_text = str(key) + if isinstance(raw_val, str) and "\n" in raw_val: + lines.append(f"{pad}{key_text}: |") + for line in raw_val.splitlines(): + lines.append(" " * (indent + 2) + line) + continue + if isinstance(raw_val, dict): + if raw_val: + lines.append(f"{pad}{key_text}:") + lines.extend(_yaml_emit(raw_val, indent + 2)) + else: + lines.append(f"{pad}{key_text}: {{}}") + continue + if isinstance(raw_val, list): + if raw_val: + lines.append(f"{pad}{key_text}:") + lines.extend(_yaml_emit(raw_val, indent + 2)) + else: + lines.append(f"{pad}{key_text}: []") + continue + lines.append(f"{pad}{key_text}: {_yaml_inline_scalar(raw_val)}") + return lines + + if isinstance(value, list): + if not value: + return [pad + "[]"] + lines: list[str] = [] + for item in value: + if isinstance(item, str) and "\n" in item: + lines.append(f"{pad}- |") + for line in item.splitlines(): + lines.append(" " * (indent + 2) + line) + continue + if isinstance(item, dict): + if not item: + lines.append(f"{pad}- {{}}") + else: + lines.append(f"{pad}-") + lines.extend(_yaml_emit(item, indent + 2)) + continue + if isinstance(item, list): + if not item: + lines.append(f"{pad}- []") + else: + lines.append(f"{pad}-") + lines.extend(_yaml_emit(item, indent + 2)) + continue + lines.append(f"{pad}- {_yaml_inline_scalar(item)}") + return lines + + return [pad + _yaml_inline_scalar(value)] + + +def _dump_yaml_text(value: Any) -> str: + return "\n".join(_yaml_emit(value)).rstrip() + "\n" + + def write_profile(path: Path, profile: dict[str, Any]) -> None: path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json.dumps(profile, indent=2) + "\n", encoding="utf-8") + path.write_text(_dump_yaml_text(profile), encoding="utf-8") + + +def _is_closed_quoted_scalar(text: str) -> bool: + stripped = text.strip() + if len(stripped) < 2: + return False + + if stripped[0] == "'" and stripped[-1] == "'": + idx = 1 + while idx < len(stripped) - 1: + if stripped[idx] == "'": + if idx + 1 < len(stripped) and stripped[idx + 1] == "'": + idx += 2 + continue + return False + idx += 1 + return True + + if stripped[0] == '"' and stripped[-1] == '"': + escaped = False + for ch in stripped[1:-1]: + if escaped: + escaped = False + elif ch == "\\": + escaped = True + elif ch == '"': + return False + return not escaped + + return False + + +def _is_closed_flow_collection(text: str) -> bool: + stripped = text.strip() + if len(stripped) < 2: + return False + if stripped[0] not in "[{": + return False + expected_end = "]" if stripped[0] == "[" else "}" + if stripped[-1] != expected_end: + return False + + depth = 0 + in_single = False + in_double = False + escaped = False + idx = 0 + while idx < len(stripped): + ch = stripped[idx] + if in_double: + if escaped: + escaped = False + elif ch == "\\": + escaped = True + elif ch == '"': + in_double = False + idx += 1 + continue + + if in_single: + if ch == "'": + if idx + 1 < len(stripped) and stripped[idx + 1] == "'": + idx += 2 + continue + in_single = False + idx += 1 + continue + + if ch == '"': + in_double = True + idx += 1 + continue + if ch == "'": + in_single = True + idx += 1 + continue + + if ch in "[{": + depth += 1 + elif ch in "]}": + depth -= 1 + if depth < 0: + return False + idx += 1 + + return depth == 0 and not in_single and not in_double and not escaped + + +def _strip_yaml_inline_comment(raw: str) -> str: + text = raw.rstrip() + in_single = False + in_double = False + escaped = False + idx = 0 + while idx < len(text): + ch = text[idx] + if in_double: + if escaped: + escaped = False + elif ch == "\\": + escaped = True + elif ch == '"': + in_double = False + idx += 1 + continue + + if in_single: + if ch == "'": + # YAML single-quote escape: doubled apostrophe. + if idx + 1 < len(text) and text[idx + 1] == "'": + idx += 2 + continue + in_single = False + idx += 1 + continue + + if ch == '"': + in_double = True + elif ch == "'": + in_single = True + elif ch == "#": + prefix = text[:idx].rstrip() + if ( + idx == 0 + or text[idx - 1].isspace() + or _is_closed_quoted_scalar(prefix) + or _is_closed_flow_collection(prefix) + ): + return text[:idx].rstrip() + idx += 1 + return text + + +def _split_yaml_flow_items(raw: str) -> list[str]: + items: list[str] = [] + buf: list[str] = [] + in_single = False + in_double = False + escaped = False + depth = 0 + + idx = 0 + while idx < len(raw): + ch = raw[idx] + if in_double: + buf.append(ch) + if escaped: + escaped = False + elif ch == "\\": + escaped = True + elif ch == '"': + in_double = False + idx += 1 + continue + + if in_single: + buf.append(ch) + if ch == "'": + if idx + 1 < len(raw) and raw[idx + 1] == "'": + buf.append(raw[idx + 1]) + idx += 2 + continue + in_single = False + idx += 1 + continue + + if ch == '"': + in_double = True + buf.append(ch) + idx += 1 + continue + if ch == "'": + in_single = True + buf.append(ch) + idx += 1 + continue + + if ch in "[{(": + depth += 1 + buf.append(ch) + idx += 1 + continue + if ch in "]})": + if depth > 0: + depth -= 1 + buf.append(ch) + idx += 1 + continue + + if ch == "," and depth == 0: + items.append("".join(buf).strip()) + buf = [] + idx += 1 + continue + + buf.append(ch) + idx += 1 + + tail = "".join(buf).strip() + if tail or raw.strip(): + items.append(tail) + return [item for item in items if item != ""] + + +def _parse_simple_yaml_scalar(raw: str) -> Any: + token = _strip_yaml_inline_comment(raw).strip() + if token == "": + return "" + lowered = token.lower() + if lowered == "true": + return True + if lowered == "false": + return False + if lowered in {"null", "~"}: + return None + if token == "{}": + return {} + if token == "[]": + return [] + if token.startswith("[") and token.endswith("]"): + inner = token[1:-1].strip() + if not inner: + return [] + return [_parse_simple_yaml_scalar(item) for item in _split_yaml_flow_items(inner)] + if token.startswith("{") and token.endswith("}"): + inner = token[1:-1].strip() + if not inner: + return {} + out: dict[str, Any] = {} + for item in _split_yaml_flow_items(inner): + if ":" not in item: + return token + key_raw, value_raw = item.split(":", 1) + key = _parse_simple_yaml_scalar(key_raw) + out[str(key)] = _parse_simple_yaml_scalar(value_raw) + return out + if re.fullmatch(r"[+-]?\d+", token): + try: + return int(token) + except ValueError: + return token + if re.fullmatch(r"[+-]?\d+\.\d+", token): + try: + return float(token) + except ValueError: + return token + if token.startswith('"') and token.endswith('"'): + try: + return json.loads(token) + except json.JSONDecodeError: + return token[1:-1] + if token.startswith("'") and token.endswith("'") and len(token) >= 2: + return token[1:-1].replace("''", "'") + return token + + +class _SimpleYamlParser: + def __init__(self, text: str) -> None: + self.lines = text.splitlines() + self.index = 0 + + @staticmethod + def _indent(line: str) -> int: + return len(line) - len(line.lstrip(" ")) + + @staticmethod + def _is_ignorable(line: str) -> bool: + stripped = line.strip() + return not stripped or stripped.startswith("#") or stripped in {"---", "..."} + + def _skip_ignorable(self) -> None: + while self.index < len(self.lines) and self._is_ignorable(self.lines[self.index]): + self.index += 1 + + def parse(self) -> Any: + self._skip_ignorable() + if self.index >= len(self.lines): + return {} + start_indent = self._indent(self.lines[self.index]) + value = self._parse_block(start_indent) + self._skip_ignorable() + if self.index < len(self.lines): + raise ValueError(f"unexpected trailing content near line {self.index + 1}") + return value + + def _parse_block(self, indent: int) -> Any: + self._skip_ignorable() + if self.index >= len(self.lines): + return {} + + cur_indent = self._indent(self.lines[self.index]) + if cur_indent < indent: + return {} + if cur_indent > indent: + indent = cur_indent + + content = self.lines[self.index][indent:] + if content == "-" or content.startswith("- "): + return self._parse_list(indent) + return self._parse_map(indent) + + def _parse_map(self, indent: int) -> dict[str, Any]: + out: dict[str, Any] = {} + while True: + self._skip_ignorable() + if self.index >= len(self.lines): + break + + line = self.lines[self.index] + cur_indent = self._indent(line) + if cur_indent < indent: + break + if cur_indent > indent: + raise ValueError(f"unexpected indentation at line {self.index + 1}") + + content = line[indent:] + if content == "-" or content.startswith("- "): + break + if ":" not in content: + raise ValueError(f"invalid mapping entry at line {self.index + 1}") + + key, raw_rest = content.split(":", 1) + key = key.strip() + rest = _strip_yaml_inline_comment(raw_rest).strip() + self.index += 1 + + if not key: + raise ValueError(f"empty mapping key at line {self.index}") + + if rest in {"|", "|-", ">", ">-"}: + out[key] = self._parse_block_scalar(indent + 2) + elif rest == "": + out[key] = self._parse_nested(indent + 2) + else: + out[key] = _parse_simple_yaml_scalar(rest) + + return out + + def _parse_nested(self, expected_indent: int) -> Any: + self._skip_ignorable() + if self.index >= len(self.lines): + return None + + cur_indent = self._indent(self.lines[self.index]) + if cur_indent < expected_indent: + return None + if cur_indent > expected_indent: + expected_indent = cur_indent + + content = self.lines[self.index][expected_indent:] + if content == "-" or content.startswith("- "): + return self._parse_list(expected_indent) + return self._parse_map(expected_indent) + + def _parse_list(self, indent: int) -> list[Any]: + out: list[Any] = [] + while True: + self._skip_ignorable() + if self.index >= len(self.lines): + break + + line = self.lines[self.index] + cur_indent = self._indent(line) + if cur_indent < indent: + break + if cur_indent > indent: + raise ValueError(f"unexpected indentation at line {self.index + 1}") + + content = line[indent:] + if not (content == "-" or content.startswith("- ")): + break + + rest = "" if content == "-" else _strip_yaml_inline_comment(content[2:]).strip() + self.index += 1 + + if rest in {"|", "|-", ">", ">-"}: + out.append(self._parse_block_scalar(indent + 2)) + continue + + if rest == "": + out.append(self._parse_nested(indent + 2)) + continue + + inline_map_match = re.match(r"^([A-Za-z0-9_.-]+):(?:\s+|$)(.*)$", rest) + if inline_map_match: + key = inline_map_match.group(1).strip() + tail = _strip_yaml_inline_comment(inline_map_match.group(2)).strip() + item: dict[str, Any] = {} + if tail in {"|", "|-", ">", ">-"}: + item[key] = self._parse_block_scalar(indent + 4) + elif tail == "": + item[key] = self._parse_nested(indent + 4) + else: + item[key] = _parse_simple_yaml_scalar(tail) + for extra_key, extra_val in self._parse_map(indent + 2).items(): + item[extra_key] = extra_val + out.append(item) + continue + + out.append(_parse_simple_yaml_scalar(rest)) + + return out + + def _parse_block_scalar(self, indent: int) -> str: + lines: list[str] = [] + while self.index < len(self.lines): + raw = self.lines[self.index] + if raw.strip() == "": + lines.append("") + self.index += 1 + continue + + cur_indent = self._indent(raw) + if cur_indent < indent: + break + + lines.append(raw[indent:]) + self.index += 1 + + while lines and lines[-1] == "": + lines.pop() + return "\n".join(lines) + + +def _parse_simple_yaml(text: str) -> Any: + return _SimpleYamlParser(text).parse() def _stable(obj: Any) -> str: @@ -650,10 +1171,14 @@ def load_yaml_or_json(path: Path) -> dict[str, Any]: try: data = json.loads(text) except json.JSONDecodeError: - die( - "PyYAML not available and profile is not valid JSON-compatible YAML. " - "Install PyYAML (python3 -m pip install pyyaml) or provide JSON syntax." - ) + try: + data = _parse_simple_yaml(text) + except ValueError as exc: + die( + "PyYAML not available and profile parsing failed. " + "Install PyYAML (python3 -m pip install pyyaml) or provide supported YAML/JSON syntax. " + f"Details: {exc}" + ) if not isinstance(data, dict): die(f"profile at {path} must be a mapping/object") return data @@ -693,6 +1218,13 @@ def to_string_list(value: Any, default: list[str] | None = None) -> list[str]: return list(default or []) +def to_nonempty_string(value: Any, default: str) -> str: + if isinstance(value, str): + text = value.strip() + return text if text else default + return default + + def normalize_profile(raw: dict[str, Any]) -> dict[str, Any]: repo = raw.get("repo") or {} review = raw.get("review") or {} @@ -752,12 +1284,11 @@ def normalize_profile(raw: dict[str, Any]) -> dict[str, Any]: return { "version": str(raw.get("version", "1")), - "repo_name": str(repo.get("name", "Repository")).strip() or "Repository", - "default_base": str(review.get("default_base", "main")).strip() or "main", + "repo_name": to_nonempty_string(repo.get("name"), "Repository"), + "default_base": to_nonempty_string(review.get("default_base"), "main"), "strict_gate": to_bool(review.get("strict_gate"), True), "depth_hotspots": to_int(review.get("depth_hotspots"), 3), - "output_root": str(review.get("output_root", ".codex/review-runs")).strip() - or ".codex/review-runs", + "output_root": to_nonempty_string(review.get("output_root"), ".codex/review-runs"), "rule_patterns": to_string_list(rules.get("include"), ["AGENTS.md", ".cursor/rules/**/*.mdc"]), "default_domains": default_domains, "allowed_domains": allowed_domains, @@ -811,6 +1342,24 @@ def discover_rule_files(repo_root: Path, patterns: list[str]) -> list[str]: return sorted(matches) +def validate_rule_patterns(repo_root: Path, patterns: list[str]) -> tuple[list[str], list[str]]: + valid_patterns: list[str] = [] + warnings: list[str] = [] + for pattern in patterns: + normalized = str(pattern).strip() + if not normalized: + continue + matches = discover_rule_files(repo_root, [normalized]) + if matches: + valid_patterns.append(normalized) + continue + if any(ch in normalized for ch in "*?[]"): + warnings.append(f"rule pattern '{normalized}' matched no files") + else: + warnings.append(f"rule file '{normalized}' not found") + return valid_patterns, warnings + + def collect_changed_files(repo_root: Path, mode: str, base: str, commit: str) -> list[str]: if mode == "base": out = run_checked(["git", "diff", "--name-only", f"{base}...HEAD"], repo_root) @@ -1054,6 +1603,20 @@ def run_review(args: argparse.Namespace) -> int: profile = normalize_profile(synced_profile) + resolved_rule_patterns, rule_pattern_warnings = validate_rule_patterns( + repo_root, + profile["rule_patterns"], + ) + for warning in rule_pattern_warnings: + print(f"warning: {warning}", file=sys.stderr) + if profile["rule_patterns"] and not resolved_rule_patterns: + print( + "warning: no enforceable rule files were resolved from profile rule patterns; " + "continuing without rule-file enforcement.", + file=sys.stderr, + ) + profile["rule_patterns"] = resolved_rule_patterns + if args.print_effective_profile: print( json.dumps( diff --git a/test/codexw_fallback_yaml_test.py b/test/codexw_fallback_yaml_test.py new file mode 100644 index 0000000..5870a9f --- /dev/null +++ b/test/codexw_fallback_yaml_test.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +"""Targeted tests for codexw fallback YAML parser/writer behavior.""" + +from __future__ import annotations + +import importlib.machinery +import importlib.util +import pathlib +import unittest + + +REPO_ROOT = pathlib.Path(__file__).resolve().parents[1] +CODEXW_PATH = REPO_ROOT / "codexw" + + +def load_codexw_module(): + loader = importlib.machinery.SourceFileLoader("codexw_module", str(CODEXW_PATH)) + spec = importlib.util.spec_from_loader(loader.name, loader) + if spec is None: + raise RuntimeError("failed to build import spec for codexw") + module = importlib.util.module_from_spec(spec) + loader.exec_module(module) + return module + + +class CodexwFallbackYamlTests(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.codexw = load_codexw_module() + + def test_flow_list_parses_as_list(self): + parsed = self.codexw._parse_simple_yaml( + """ +domains: + default: [core] + allowed: [core, testing] +""".strip() + ) + + self.assertEqual(parsed["domains"]["default"], ["core"]) + self.assertEqual(parsed["domains"]["allowed"], ["core", "testing"]) + + normalized = self.codexw.normalize_profile(parsed) + self.assertEqual(normalized["default_domains"], ["core"]) + self.assertEqual(normalized["allowed_domains"], ["core", "testing"]) + + def test_inline_comments_do_not_override_values(self): + parsed = self.codexw._parse_simple_yaml( + """ +review: + strict_gate: false # advisory mode + depth_hotspots: 1 # small changes only +domains: + default: [core] + allowed: [core] +""".strip() + ) + + self.assertFalse(parsed["review"]["strict_gate"]) + self.assertEqual(parsed["review"]["depth_hotspots"], 1) + + normalized = self.codexw.normalize_profile(parsed) + self.assertFalse(normalized["strict_gate"]) + self.assertEqual(normalized["depth_hotspots"], 1) + + def test_no_space_comment_after_closed_flow_and_quoted_scalars(self): + parsed = self.codexw._parse_simple_yaml( + """ +domains: + default: [core]# comment + allowed: [core, testing]# comment +repo: + name: 'Repo Name'# comment +prompts: + global: "line"# comment +meta: + link: https://example.com/#fragment +""".strip() + ) + + self.assertEqual(parsed["domains"]["default"], ["core"]) + self.assertEqual(parsed["domains"]["allowed"], ["core", "testing"]) + self.assertEqual(parsed["repo"]["name"], "Repo Name") + self.assertEqual(parsed["prompts"]["global"], "line") + self.assertEqual(parsed["meta"]["link"], "https://example.com/#fragment") + + normalized = self.codexw.normalize_profile(parsed) + self.assertEqual(normalized["default_domains"], ["core"]) + self.assertEqual(normalized["allowed_domains"], ["core", "testing"]) + self.assertEqual(normalized["repo_name"], "Repo Name") + + def test_list_item_with_colon_is_not_forced_to_inline_map(self): + parsed = self.codexw._parse_simple_yaml( + """ +values: + - https://example.com +""".strip() + ) + self.assertEqual(parsed["values"], ["https://example.com"]) + + def test_single_quote_escapes_in_flow_items(self): + parsed = self.codexw._parse_simple_yaml( + """ +values: + - ['it''s,ok', core] +""".strip() + ) + self.assertEqual(parsed["values"], [["it's,ok", "core"]]) + + def test_explicit_nulls_do_not_turn_into_empty_maps(self): + parsed = self.codexw._parse_simple_yaml( + """ +review: + default_base: + output_root: +domains: + default: [core] + allowed: [core] +pipeline: + core_passes: + - id: core-breadth + name: Core breadth + instructions: | + test +""".strip() + ) + + self.assertIsNone(parsed["review"]["default_base"]) + self.assertIsNone(parsed["review"]["output_root"]) + + normalized = self.codexw.normalize_profile(parsed) + self.assertEqual(normalized["default_base"], "main") + self.assertEqual(normalized["output_root"], ".codex/review-runs") + + def test_dump_yaml_round_trips_with_fallback_parser(self): + profile = { + "version": 1, + "repo": {"name": "Repo"}, + "review": {"default_base": "main", "strict_gate": True, "depth_hotspots": 2}, + "domains": {"default": ["core"], "allowed": ["core", "testing"]}, + "prompts": { + "global": "Line 1\nLine 2", + "by_domain": {"testing": "Focus on tests"}, + }, + "pipeline": { + "include_policy_pass": True, + "include_core_passes": True, + "include_domain_passes": True, + "include_depth_passes": True, + "core_passes": [ + { + "id": "core-breadth", + "name": "Core breadth", + "instructions": "Task:\n- cover all files", + } + ], + }, + } + + dumped = self.codexw._dump_yaml_text(profile) + parsed = self.codexw._parse_simple_yaml(dumped) + + self.assertEqual(parsed["domains"]["allowed"], ["core", "testing"]) + self.assertEqual(parsed["review"]["depth_hotspots"], 2) + self.assertEqual(parsed["prompts"]["global"], "Line 1\nLine 2") + + +if __name__ == "__main__": + unittest.main() From f6297b5de913127cfba3b8584bab49222b7521fa Mon Sep 17 00:00:00 2001 From: pavan Date: Sat, 21 Feb 2026 15:39:04 +0530 Subject: [PATCH 4/7] Restructure codexw into package layout with profile examples --- .gitignore | 2 + .pre-commit-hooks.yaml | 6 +- README.md | 26 ++--- codexw/__init__.py | 0 codexw => codexw/__main__.py | 83 +++++----------- codexw/codexw-features-and-usecases.md | 97 +++++++++++++++++++ ...view-profile.duolingo-android.example.yaml | 9 +- codexw/local-review-profile.example.yaml | 53 ++++++++++ test/codexw_fallback_yaml_test.py | 48 ++++++++- 9 files changed, 244 insertions(+), 80 deletions(-) create mode 100644 .gitignore create mode 100644 codexw/__init__.py rename codexw => codexw/__main__.py (96%) create mode 100644 codexw/codexw-features-and-usecases.md rename local-review-profile.example.yaml => codexw/local-review-profile.duolingo-android.example.yaml (75%) create mode 100644 codexw/local-review-profile.example.yaml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..944b817 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea/ +codexw-generic-bootstrap.yaml diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 3f7f3aa..3e60b47 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -28,8 +28,8 @@ - id: codex-review-pr-grade name: Codex AI Code Review (PR-grade) - description: Profile-aware multi-pass Codex review via ./codexw (auto-generates and auto-syncs local-review-profile.yaml). - entry: ./codexw review + description: Profile-aware multi-pass Codex review via ./codexw/__main__.py (auto-generates and auto-syncs local-review-profile.yaml). + entry: ./codexw/__main__.py review language: script pass_filenames: false stages: [manual] @@ -63,7 +63,7 @@ - id: codex-review-pr-grade-dev name: Codex AI Code Review (PR-grade, dev) - entry: ./codexw review + entry: ./codexw/__main__.py review language: script pass_filenames: false stages: [manual] diff --git a/README.md b/README.md index 3932f4f..1fe0486 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ Profile-aware multi-pass local review using `codexw`. This hook is also `manual` It runs detailed PR-grade review from `local-review-profile.yaml`. `codexw` also includes compatibility fallback for Codex CLI versions that reject prompt+target combinations. -Canonical command is `codexw review`; `codexw review-pr` is kept as a compatibility alias. +Canonical command is `./codexw/__main__.py review`; `./codexw/__main__.py review-pr` is kept as a compatibility alias. If profile is missing, `codexw` auto-generates `local-review-profile.yaml` on first run. On each run, `codexw` auto-syncs profile entries derived from repository signals (rules/domains/domain prompts) while preserving manual overrides. Stale auto-managed entries are pruned when source-of-truth changes. @@ -103,19 +103,19 @@ pre-commit run codex-review-pr-grade --all-files Direct execution (without pre-commit): ```bash -./codexw review -./codexw review --base main -./codexw review --domains core,testing --no-fail-on-findings +./codexw/__main__.py review +./codexw/__main__.py review --base main +./codexw/__main__.py review --domains core,testing --no-fail-on-findings # Create missing profile and exit -./codexw review --bootstrap-only +./codexw/__main__.py review --bootstrap-only # Sync profile from repository signals and exit -./codexw review --sync-profile-only +./codexw/__main__.py review --sync-profile-only # Validate profile loading only (no Codex run) -./codexw review --print-effective-profile +./codexw/__main__.py review --print-effective-profile # Disable profile sync for one run -./codexw review --no-sync-profile +./codexw/__main__.py review --no-sync-profile # Keep stale auto-managed profile entries for this run -./codexw review --no-prune-autogen +./codexw/__main__.py review --no-prune-autogen ``` `local-review-profile.yaml` schema (minimum practical shape): @@ -164,8 +164,12 @@ pipeline: - Perform depth-first review of hotspot file: {hotspot} ``` -Reference profile: -`local-review-profile.example.yaml` +Reference profiles: +- `codexw/local-review-profile.example.yaml` (generic template) +- `codexw/local-review-profile.duolingo-android.example.yaml` (concrete Duolingo Android example) + +Feature/use-case guide: +- `codexw/codexw-features-and-usecases.md` Hook id for pre-commit: `codex-review-pr-grade` diff --git a/codexw/__init__.py b/codexw/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/codexw b/codexw/__main__.py similarity index 96% rename from codexw rename to codexw/__main__.py index 52518ea..e0804ea 100755 --- a/codexw +++ b/codexw/__main__.py @@ -18,39 +18,6 @@ NO_FINDINGS_SENTINEL = "No actionable findings." -DOMAIN_PROMPT_TEMPLATES: dict[str, str] = { - "experiments": ( - "Focus areas:\n" - "- experiment overtreatment risk and incorrect gating\n" - "- control/treatment behavior leaks and inverted conditions\n" - "- stale experiment branches and incomplete cleanup\n" - "- observeTreatmentRecord/getConditionAndTreat usage correctness\n" - "- missing test coverage for control/treatment behavior" - ), - "compose": ( - "Focus areas:\n" - "- required Compose conventions from rule files\n" - "- prohibited patterns (Text usage, theming violations, forbidden components)\n" - "- misuse of shared design-compose/common-compose primitives\n" - "- state/recomposition/lifecycle mistakes in composables" - ), - "coroutines": ( - "Focus areas:\n" - "- dispatcher injection and hardcoded dispatcher violations\n" - "- cancellation handling and structured concurrency issues\n" - "- viewModelScope/lifecycle flow collection correctness\n" - "- RxJava/Coroutines interop and migration pattern violations\n" - "- coroutine test pattern correctness (runTest/TestScope/dispatchers)" - ), - "testing": ( - "Focus areas:\n" - "- unit testing conventions and AAA structure violations\n" - "- required fake-vs-mock usage pattern violations\n" - "- FakeUsersRepository and User.emptyUser().copy migration correctness\n" - "- weak assertions and missing verification for side effects" - ), -} - DEFAULT_GLOBAL_PROMPT = ( "Use repository standards for lifecycle, state, architecture boundaries, and " "production-safety. Prioritize behavior-changing issues and policy violations " @@ -329,18 +296,10 @@ def parse_frontmatter(path: Path) -> dict[str, Any]: def _domain_hints_from_text(text: str) -> list[str]: - lowered = text.lower() - out: list[str] = [] - hints: list[tuple[str, tuple[str, ...]]] = [ - ("experiments", ("experiment", "treatment", "abtest", "feature-flag")), - ("compose", ("compose", "composable", "design-compose")), - ("coroutines", ("coroutine", "flow", "rxjava", "dispatcher")), - ("testing", ("testing", "test", "fake", "mock", "assert", "junit")), - ] - for domain, needles in hints: - if any(needle in lowered for needle in needles): - out.append(domain) - return out + # Keep inference repo-agnostic: domain ownership should come from explicit + # rule metadata or repository profile, not keyword guesses in script code. + _ = text + return [] def _to_boolish(value: Any) -> bool | None: @@ -367,17 +326,7 @@ def _extract_rule_domains(meta: dict[str, Any], rel_path: str) -> list[str]: for candidate in domain_candidates: for item in to_string_list(candidate, []): normalized = item.strip().lower().replace(" ", "-") - if normalized in {"experiment", "experiments"}: - domains.append("experiments") - elif normalized in {"compose", "ui-compose"}: - domains.append("compose") - elif normalized in {"coroutines", "coroutine", "async", "rxjava"}: - domains.append("coroutines") - elif normalized in {"testing", "tests", "test"}: - domains.append("testing") - elif normalized in {"core"}: - domains.append("core") - elif normalized: + if normalized: domains.append(normalized) if not domains: @@ -413,10 +362,11 @@ def infer_domains_from_rule_metadata(rule_metadata: list[dict[str, Any]]) -> lis for domain in to_string_list(row.get("domains"), []): domains.add(domain) - preferred = ["core", "experiments", "compose", "coroutines", "testing"] - result = [d for d in preferred if d in domains] + result: list[str] = [] + if "core" in domains: + result.append("core") for domain in sorted(domains): - if domain not in result: + if domain and domain not in result: result.append(domain) return result @@ -433,14 +383,25 @@ def default_pipeline_config() -> dict[str, Any]: } +def default_domain_prompt_template(domain: str) -> str: + return ( + f"Domain focus: {domain}\n" + "Focus areas:\n" + "- domain-specific correctness and policy compliance\n" + "- behavior/regression risks and boundary-condition failures\n" + "- state, contract, lifecycle, or concurrency issues relevant to this domain\n" + "- missing or weak tests for critical domain behavior" + ) + + def build_bootstrap_profile(repo_root: Path) -> dict[str, Any]: rule_patterns = infer_rule_patterns(repo_root) rule_metadata = discover_rule_metadata(repo_root, rule_patterns) domains = infer_domains_from_rule_metadata(rule_metadata) by_domain: dict[str, str] = { - d: DOMAIN_PROMPT_TEMPLATES[d] + d: default_domain_prompt_template(d) for d in domains - if d in DOMAIN_PROMPT_TEMPLATES and d != "core" + if d != "core" } return { diff --git a/codexw/codexw-features-and-usecases.md b/codexw/codexw-features-and-usecases.md new file mode 100644 index 0000000..baaf3eb --- /dev/null +++ b/codexw/codexw-features-and-usecases.md @@ -0,0 +1,97 @@ +# Codexw Features + Use Cases + +## 1) Quick local review hook (`codex-review`) +`codex-review` runs plain `codex review` from pre-commit manual stage. +It gives a fast, low-friction local review path. +Use this for quick sanity checks before push. +Why this matters: fast feedback without waiting for PR-grade orchestration. + +## 2) PR-grade local review hook (`codex-review-pr-grade`) +`codex-review-pr-grade` runs `./codexw/__main__.py review`. +It executes profile-driven, multi-pass review instead of one generic pass. +Use this before opening or updating a PR. +Why this matters: deeper, more consistent local review quality. + +## 3) Rule pattern validation at startup +`codexw` reads `rules.include` from `local-review-profile.yaml` (for example `AGENTS.md`, `*.mdc`). +At startup, it resolves each pattern and checks if real files exist. +If a pattern/file does not resolve, it prints a warning and removes that pattern from effective enforcement for that run. +Why this matters: avoids silent “rules are enforced” assumptions when paths are stale or misconfigured. + +## 4) Fallback YAML parser/writer (no `PyYAML` required) +`codexw` can read/write profile YAML even when `PyYAML` is not installed. +This keeps profile bootstrap, sync, and review runnable across varied machines and CI images. +Use this when environment dependencies are minimal or inconsistent. +Why this matters: review workflow stays operational without extra setup. + +## 5) Hardened fallback parsing semantics +Fallback parsing supports flow lists, inline comments, null values, quoted scalars, and escape handling. +It is designed to preserve effective config correctness in real-world YAML formatting. +Use this when profiles include compact YAML forms and comments. +Why this matters: prevents silent config drift that can change domains, gating, or scope. + +## 6) Target scope control (`--base`, `--uncommitted`, `--commit`) +`codexw` can review a branch diff, local dirty state, or a specific commit. +This limits analysis to the intended change window. +Use `--uncommitted` during iteration and `--base` for pre-merge validation. +Why this matters: less noise, more relevant findings. + +## 7) Profile bootstrap and sync (`--bootstrap-only`, `--sync-profile-only`) +If profile is missing, bootstrap creates it from repository signals. +Sync refreshes auto-managed parts (rules/domains/prompts) while preserving manual edits. +Use this during onboarding and rule evolution. +Why this matters: less manual maintenance and better policy consistency. + +## 8) Sync controls (`--no-sync-profile`, `--no-prune-autogen`) +These flags let teams freeze profile behavior for a run. +`--no-sync-profile` skips sync; `--no-prune-autogen` keeps stale auto-managed entries. +Use this for debugging or controlled rollout. +Why this matters: safer troubleshooting when behavior changes are under investigation. + +## 9) Domain-focused review (`--domains core,testing`) +`--domains` filters which domain passes run and which domain prompts are applied. +Backend execution is still `codex review`, but wrapper orchestration changes what is asked and how passes are executed. +Use this for targeted work (for example testing-heavy changes). +Why this matters: concentrates runtime budget on highest-value domains. + +## 10) Wrapper enhancement over plain `codex review` +`codexw` adds orchestration around `codex review`: pass planning, prompt composition, rule context injection, parsing, reporting, and gating. +So one backend engine becomes a structured local review pipeline. +Use this when chat-style one-pass review is not enough. +Why this matters: improves repeatability and depth. + +## 11) Multi-pass pipeline (policy/core/domain/depth) +`codexw` runs specialized passes instead of a single flat prompt. +Each pass targets different risk classes and coverage goals. +Use this on complex or high-impact diffs. +Why this matters: better recall and fewer blind spots. + +## 12) Hotspot depth analysis (`--depth-hotspots`) +Hotspots are inferred from changed-line churn and reviewed with extra depth passes. +This prioritizes files with higher defect likelihood. +Use this for large diffs with uneven risk distribution. +Why this matters: deeper scrutiny where it most likely pays off. + +## 13) Gating modes (`--fail-on-findings`, `--no-fail-on-findings`) +`codexw` can fail non-zero on findings or run in advisory mode. +This supports both strict gate and exploratory feedback workflows. +Use fail mode for merge readiness and advisory mode during early iteration. +Why this matters: one tool fits multiple workflow stages. + +## 14) Effective profile inspection (`--print-effective-profile`) +This prints normalized runtime profile after loading/sync/validation and exits. +No review passes are executed. +Use this to verify domains, base branch, gating settings, and resolved rule patterns. +Why this matters: configuration behavior is inspectable before full execution. + +## 15) Structured review artifacts +Outputs include per-pass markdown, combined report, findings JSON, changed files/modules, hotspots, and enforced rule inventory. +These artifacts support debugging, review handoff, and automation. +Use this when teams need both human-readable and machine-readable outputs. +Why this matters: easier triage, auditing, and tooling integration. + +## 16) Compatibility retry for CLI prompt/target constraints +If a Codex CLI variant rejects prompt+target combinations, `codexw` retries with a compatible path. +This avoids hard failures due to client capability differences. +Use this across mixed developer environments. +Why this matters: more reliable local execution across CLI versions. diff --git a/local-review-profile.example.yaml b/codexw/local-review-profile.duolingo-android.example.yaml similarity index 75% rename from local-review-profile.example.yaml rename to codexw/local-review-profile.duolingo-android.example.yaml index 2c96427..2dce3c8 100644 --- a/local-review-profile.example.yaml +++ b/codexw/local-review-profile.duolingo-android.example.yaml @@ -31,15 +31,16 @@ domains: prompts: global: | Prioritize behavior-changing issues over style concerns. + Treat AGENTS.md and synced AI rules as enforcement sources. by_domain: experiments: | - Focus on experiment gating, control/treatment leakage, and cleanup correctness. + Focus on experiment gating correctness, control/treatment isolation, and cleanup safety. compose: | - Focus on design-system usage, state/recomposition behavior, and lifecycle correctness. + Focus on design-system conformance, state/recomposition behavior, and lifecycle safety. coroutines: | - Focus on structured concurrency, cancellation, dispatcher correctness, and interop risks. + Focus on structured concurrency, cancellation behavior, and dispatcher/lifecycle correctness. testing: | - Focus on missing tests for high-risk behavior and weak assertions. + Focus on missing tests for high-risk paths and weak assertions around side effects. pipeline: include_policy_pass: true diff --git a/codexw/local-review-profile.example.yaml b/codexw/local-review-profile.example.yaml new file mode 100644 index 0000000..7d48b3d --- /dev/null +++ b/codexw/local-review-profile.example.yaml @@ -0,0 +1,53 @@ +version: 1 + +repo: + name: Repo Name + +review: + default_base: main + strict_gate: true + depth_hotspots: 3 + output_root: .codex/review-runs + +rules: + include: + - AGENTS.md + - .cursor/rules/**/*.mdc + +domains: + default: + - core + - testing + allowed: + - core + - testing + +prompts: + global: | + Add repository-wide review context here. + by_domain: + testing: | + Add testing-specific context for this repository. + +pipeline: + include_policy_pass: true + include_core_passes: true + include_domain_passes: true + include_depth_passes: true + core_passes: + - id: core-breadth + name: Core breadth coverage + instructions: | + Review every changed file in this shard and report actionable findings only. + - id: core-regressions + name: Core regressions + instructions: | + Focus on behavioral regressions, crash/nullability, and security/privacy issues. + - id: core-architecture + name: Core architecture + instructions: | + Focus on architecture boundaries, concurrency, and lifecycle correctness. + - id: core-tests + name: Core tests + instructions: | + Focus on missing tests and high-risk edge cases without coverage. diff --git a/test/codexw_fallback_yaml_test.py b/test/codexw_fallback_yaml_test.py index 5870a9f..26d1d25 100644 --- a/test/codexw_fallback_yaml_test.py +++ b/test/codexw_fallback_yaml_test.py @@ -6,11 +6,12 @@ import importlib.machinery import importlib.util import pathlib +import tempfile import unittest REPO_ROOT = pathlib.Path(__file__).resolve().parents[1] -CODEXW_PATH = REPO_ROOT / "codexw" +CODEXW_PATH = REPO_ROOT / "codexw" / "__main__.py" def load_codexw_module(): @@ -164,6 +165,51 @@ def test_dump_yaml_round_trips_with_fallback_parser(self): self.assertEqual(parsed["review"]["depth_hotspots"], 2) self.assertEqual(parsed["prompts"]["global"], "Line 1\nLine 2") + def test_default_domain_prompt_template_is_repo_agnostic(self): + prompt = self.codexw.default_domain_prompt_template("custom-domain") + self.assertIn("Domain focus: custom-domain", prompt) + self.assertIn("domain-specific correctness and policy compliance", prompt) + self.assertNotIn("FakeUsersRepository", prompt) + self.assertNotIn("Duolingo", prompt) + + def test_bootstrap_profile_uses_generic_domain_prompts(self): + with tempfile.TemporaryDirectory() as tmp: + repo_root = pathlib.Path(tmp) + rules_dir = repo_root / ".cursor" / "rules" + rules_dir.mkdir(parents=True, exist_ok=True) + (rules_dir / "testing-rule.mdc").write_text( + """--- +description: Testing conventions +domain: testing +--- +Use testing standards. +""", + encoding="utf-8", + ) + + profile = self.codexw.build_bootstrap_profile(repo_root) + self.assertIn("testing", profile["domains"]["allowed"]) + self.assertEqual( + profile["prompts"]["by_domain"]["testing"], + self.codexw.default_domain_prompt_template("testing"), + ) + + def test_extract_rule_domains_does_not_keyword_infer_from_description(self): + domains = self.codexw._extract_rule_domains( + {"description": "check experiment treatment and dispatcher usage"}, + "rules/misc-rule.mdc", + ) + self.assertEqual(domains, []) + + def test_infer_domains_from_rule_metadata_is_generic(self): + inferred = self.codexw.infer_domains_from_rule_metadata( + [ + {"domains": ["zeta"]}, + {"domains": ["alpha"]}, + ] + ) + self.assertEqual(inferred, ["core", "alpha", "zeta"]) + if __name__ == "__main__": unittest.main() From 1453253fe54140ca9210a97b7a6e5a6482dba673 Mon Sep 17 00:00:00 2001 From: pavan Date: Sat, 21 Feb 2026 15:55:14 +0530 Subject: [PATCH 5/7] Clarify sync-ai-rules vs Codex hook execution model --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 1fe0486..fc90aaa 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,10 @@ This hook synchronizes AI coding rules from `.cursor/rules/` and `.code_review/` This ensures all AI coding assistants stay aware of the same rules and coding conventions. +Execution model note: +- `sync-ai-rules` is deterministic file generation, so it uses `language: docker_image` and a `files:` filter to run automatically when rule files change. +- Codex hooks are AI-review workflows (`codex review` / `codexw`) that may take longer and require local auth, so they run in `manual` stage by default and are triggered on demand. + ## Codex AI Code Review Hook (`codex-review`) On-demand AI code review using the OpenAI Codex CLI. This hook runs in `manual` stage by default, meaning it won't block normal commits. From 0b1d764e89e3377a4f03b0417f49f6bbc122c28d Mon Sep 17 00:00:00 2001 From: pavan Date: Tue, 24 Feb 2026 00:02:34 +0530 Subject: [PATCH 6/7] Add codexw: profile-aware multi-pass Codex review wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Split monolithic __main__.py into 13 focused modules - Model fallback chain (recency-biased, 5-model window) - Reasoning effort fallback (xhigh → high → medium → low) - State reuse across passes - Fallback YAML parser/writer (no PyYAML required) - 28 regression tests - architecture.md + features-and-usecases.md documentation --- .pre-commit-hooks.yaml | 8 +- Makefile | 4 +- README.md | 107 +- codexw/__init__.py | 31 + codexw/__main__.py | 2009 ++--------------- codexw/architecture.md | 258 +++ codexw/cli.py | 87 + codexw/codexw-features-and-usecases.md | 97 - codexw/constants.py | 128 ++ codexw/features-and-usecases.md | 89 + codexw/finding_parser.py | 119 + codexw/git.py | 147 ++ ...=> local-review-profile.repo.example.yaml} | 0 codexw/passes.py | 619 +++++ codexw/profile.py | 518 +++++ codexw/prompts.py | 89 + codexw/reporting.py | 179 ++ codexw/utils.py | 156 ++ codexw/yaml_fallback.py | 413 ++++ codexw/yaml_writer.py | 110 + test/codexw_fallback_yaml_test.py | 215 -- test/codexw_test.py | 763 +++++++ 22 files changed, 3983 insertions(+), 2163 deletions(-) create mode 100644 codexw/architecture.md create mode 100644 codexw/cli.py delete mode 100644 codexw/codexw-features-and-usecases.md create mode 100644 codexw/constants.py create mode 100644 codexw/features-and-usecases.md create mode 100644 codexw/finding_parser.py create mode 100644 codexw/git.py rename codexw/{local-review-profile.example.yaml => local-review-profile.repo.example.yaml} (100%) create mode 100644 codexw/passes.py create mode 100644 codexw/profile.py create mode 100644 codexw/prompts.py create mode 100644 codexw/reporting.py create mode 100644 codexw/utils.py create mode 100644 codexw/yaml_fallback.py create mode 100644 codexw/yaml_writer.py delete mode 100644 test/codexw_fallback_yaml_test.py create mode 100644 test/codexw_test.py diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 3e60b47..d938faf 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -35,11 +35,9 @@ stages: [manual] verbose: true -# Nobody should ever use these hooks in production. They're just for testing PRs in -# the duolingo/pre-commit-hooks repo more easily without having to tag and push -# temporary images to Docker Hub. Usage: edit a consumer repo's hook config to -# instead declare `id: duolingo-dev` or `id: sync-ai-rules-dev` and `rev: `, -# then run `pre-commit run --all-files` +# Dev hooks for testing PRs in the duolingo/pre-commit-hooks repo. +# Usage: edit a consumer repo's hook config to declare `id: duolingo-dev` +# and `rev: `, then run `pre-commit run --all-files` - id: duolingo-dev name: Duolingo (dev) entry: /entry diff --git a/Makefile b/Makefile index b0c9f03..ea72ed3 100644 --- a/Makefile +++ b/Makefile @@ -48,8 +48,8 @@ shell: # Runs tests .PHONY: test test: - echo "Running codexw fallback parser tests..." - python3 test/codexw_fallback_yaml_test.py + echo "Running codexw tests..." + python3 test/codexw_test.py docker run --rm -v "$${PWD}/test:/test" "$$(docker build --network=host -q .)" sh -c \ 'cd /tmp \ && cp -r /test/before actual \ diff --git a/README.md b/README.md index fc90aaa..956350a 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ This hook synchronizes AI coding rules from `.cursor/rules/` and `.code_review/` This ensures all AI coding assistants stay aware of the same rules and coding conventions. Execution model note: + - `sync-ai-rules` is deterministic file generation, so it uses `language: docker_image` and a `files:` filter to run automatically when rule files change. - Codex hooks are AI-review workflows (`codex review` / `codexw`) that may take longer and require local auth, so they run in `manual` stage by default and are triggered on demand. @@ -58,10 +59,12 @@ Execution model note: On-demand AI code review using the OpenAI Codex CLI. This hook runs in `manual` stage by default, meaning it won't block normal commits. **Prerequisites:** + - Install Codex CLI: `brew install codex` or `npm install -g @openai/codex` - Authenticate: `codex auth login` (uses Duolingo ChatGPT org credentials) **Usage:** + ```bash # Run Codex review on staged changes pre-commit run codex-review @@ -71,6 +74,7 @@ pre-commit run codex-review --all-files ``` For direct CLI usage without pre-commit: + ```bash codex review --uncommitted codex review --base master @@ -82,21 +86,25 @@ Profile-aware multi-pass local review using `codexw`. This hook is also `manual` It runs detailed PR-grade review from `local-review-profile.yaml`. `codexw` also includes compatibility fallback for Codex CLI versions that reject prompt+target combinations. +`codexw` also includes recency-biased model fallback (latest 5 candidates, for example `gpt-5.3-codex` → `gpt-5.2-codex` → `gpt-5.1-codex` → `gpt-5-codex` → `gpt-4.2-codex`) and reasoning-effort fallback (`xhigh` → `high` → `medium` → `low`). Canonical command is `./codexw/__main__.py review`; `./codexw/__main__.py review-pr` is kept as a compatibility alias. If profile is missing, `codexw` auto-generates `local-review-profile.yaml` on first run. On each run, `codexw` auto-syncs profile entries derived from repository signals (rules/domains/domain prompts) while preserving manual overrides. Stale auto-managed entries are pruned when source-of-truth changes. PR-grade outputs include: + - pass-level markdown reports - combined markdown report (`combined-report.md`) - machine-readable findings (`findings.json`) **Prerequisites:** + - Install Codex CLI: `brew install codex` or `npm install -g @openai/codex` - Authenticate: `codex auth login` - Optional: pre-seed `local-review-profile.yaml` in target repo root (see example below) **Usage:** + ```bash # Run PR-grade review for current diff vs profile default base branch pre-commit run codex-review-pr-grade @@ -106,6 +114,7 @@ pre-commit run codex-review-pr-grade --all-files ``` Direct execution (without pre-commit): + ```bash ./codexw/__main__.py review ./codexw/__main__.py review --base main @@ -122,7 +131,98 @@ Direct execution (without pre-commit): ./codexw/__main__.py review --no-prune-autogen ``` +`review-pr` is an alias for `review` (kept for backward compatibility): + +```bash +./codexw/__main__.py review-pr --base master +``` + +### codexw CLI Flags Reference + +| Flag | Purpose | Default / Notes | +| --- | --- | --- | +| `--profile ` | Profile file to load/write. | Defaults to `local-review-profile.yaml` at repo root. | +| `--base ` | Review `branch...HEAD` diff. | Mutually exclusive with `--uncommitted` and `--commit`. Defaults to profile `review.default_base` when no target flag is passed. | +| `--uncommitted` | Review working tree changes. | Mutually exclusive target mode. Includes tracked and untracked files. | +| `--commit ` | Review a specific commit. | Mutually exclusive target mode. | +| `--domains ` | Restrict domain passes to selected domains. | Must be subset of profile `domains.allowed`. Defaults to profile `domains.default`. | +| `--depth-hotspots ` | Override hotspot depth pass count for this run. | Overrides profile `review.depth_hotspots`. | +| `--title ` | Pass custom title to `codex review`. | Optional metadata for review runs. | +| `--output-dir ` | Write artifacts to explicit output directory. | Defaults to `/`. | +| `--model ` | Requested model override for this run. | Used as preferred model; fallback chain may apply if unavailable. | +| `--print-effective-profile` | Print normalized effective profile and exit. | No review passes executed. | +| `--bootstrap-only` | Create missing profile (if needed) and exit. | No review passes executed. | +| `--sync-profile-only` | Sync profile from repo signals and exit. | No review passes executed. Cannot be combined with `--no-sync-profile`. | +| `--no-bootstrap-profile` | Disable automatic profile generation when missing. | Fails if profile file is absent. | +| `--no-sync-profile` | Disable sync from repository signals for this run. | Uses profile file as-is. | +| `--no-prune-autogen` | Keep stale auto-managed entries during sync for this run. | Sync still runs unless `--no-sync-profile` is set. | +| `--fail-on-findings` | Force strict gate (exit 2 when findings exist). | Mutually exclusive with `--no-fail-on-findings`. | +| `--no-fail-on-findings` | Advisory mode (do not fail on findings). | Mutually exclusive with `--fail-on-findings`. | + +### Common Flag Combos + +```bash +# Validate profile and inspect resolved settings (no Codex call) +./codexw/__main__.py review --profile local-review-profile.yaml --print-effective-profile + +# Advisory targeted review for local iteration +./codexw/__main__.py review --uncommitted --domains core,testing --no-fail-on-findings + +# Strict PR-grade run with explicit artifacts path +./codexw/__main__.py review --base master --fail-on-findings --output-dir .codex/review-runs/manual +``` + +### Sample Output for codexw-only Flags + +`--bootstrap-only` (missing profile): + +```text +$ ./codexw/__main__.py review --bootstrap-only +Generated local-review-profile.yaml from repository signals. Review and commit it. +Synchronized local-review-profile.yaml from repository signals. +warning: rule file 'AGENTS.md' not found +warning: rule pattern '.cursor/rules/**/*.mdc' matched no files +Profile ready: /local-review-profile.yaml +``` + +`--sync-profile-only`: + +```text +$ ./codexw/__main__.py review --sync-profile-only +warning: rule file 'AGENTS.md' not found +warning: rule pattern '.cursor/rules/**/*.mdc' matched no files +Profile ready: /local-review-profile.yaml +``` + +`--print-effective-profile`: + +```text +$ ./codexw/__main__.py review --print-effective-profile +warning: rule file 'AGENTS.md' not found +warning: rule pattern '.cursor/rules/**/*.mdc' matched no files +{ + "profile_path": "/local-review-profile.yaml", + "repo_root": "", + "effective_profile": { ...normalized profile JSON... } +} +``` + +`--no-bootstrap-profile` (profile missing): + +```text +$ ./codexw/__main__.py review --no-bootstrap-profile --print-effective-profile +error: profile not found: /local-review-profile.yaml. Add local-review-profile.yaml or pass --profile. +``` + +`--sync-profile-only` with `--no-sync-profile` (invalid combination): + +```text +$ ./codexw/__main__.py review --sync-profile-only --no-sync-profile +error: --sync-profile-only cannot be combined with --no-sync-profile +``` + `local-review-profile.yaml` schema (minimum practical shape): + ```yaml version: 1 @@ -169,11 +269,14 @@ pipeline: ``` Reference profiles: -- `codexw/local-review-profile.example.yaml` (generic template) + +- `codexw/local-review-profile.repo.example.yaml` (generic template) - `codexw/local-review-profile.duolingo-android.example.yaml` (concrete Duolingo Android example) Feature/use-case guide: -- `codexw/codexw-features-and-usecases.md` + +- `codexw/features-and-usecases.md` +- `codexw/architecture.md` (internal architecture) Hook id for pre-commit: `codex-review-pr-grade` diff --git a/codexw/__init__.py b/codexw/__init__.py index e69de29..4e61054 100644 --- a/codexw/__init__.py +++ b/codexw/__init__.py @@ -0,0 +1,31 @@ +"""Codexw: Profile-aware Codex PR-grade review wrapper. + +Public API: + - CodexwError: Base exception type + - PassSpec: Data class for review pass specification + - build_bootstrap_profile: Build initial profile from repo signals + - default_domain_prompt_template: Generic per-domain prompt template + - normalize_profile: Normalize raw profile dict + - load_profile: Load profile from file + - write_profile: Write profile to file +""" + +from .passes import PassSpec +from .profile import ( + build_bootstrap_profile, + default_domain_prompt_template, + load_profile, + normalize_profile, + write_profile, +) +from .utils import CodexwError + +__all__ = [ + "CodexwError", + "PassSpec", + "build_bootstrap_profile", + "default_domain_prompt_template", + "load_profile", + "normalize_profile", + "write_profile", +] diff --git a/codexw/__main__.py b/codexw/__main__.py index e0804ea..866836e 100755 --- a/codexw/__main__.py +++ b/codexw/__main__.py @@ -1,1524 +1,101 @@ #!/usr/bin/env python3 -"""Generic Codex PR-grade review wrapper (profile-aware, essentials-only).""" +"""Codexw: Profile-aware Codex PR-grade review wrapper. + +This is the main entry point for codexw. It orchestrates: +1. Profile loading and synchronization +2. Git change detection +3. Pass execution via Codex CLI +4. Report generation + +Usage: + python -m codexw review --base master + python -m codexw review --uncommitted + ./codexw/__main__.py review +""" from __future__ import annotations -import argparse import datetime as dt -import glob import json import os -import re -import shlex -import subprocess import sys from pathlib import Path -from typing import Any - - -NO_FINDINGS_SENTINEL = "No actionable findings." - -DEFAULT_GLOBAL_PROMPT = ( - "Use repository standards for lifecycle, state, architecture boundaries, and " - "production-safety. Prioritize behavior-changing issues and policy violations " - "over style-only comments." -) - -DEFAULT_POLICY_PASS_INSTRUCTIONS = ( - "Task:\n" - "- Enforce every standard file listed above.\n" - "- Output a 'Rule Coverage' section with one line per rule file:\n" - " :: Covered | NotApplicable :: short reason\n" - "- Then output actionable findings using the required schema.\n" - f"- If no actionable findings exist, include exactly this line: {NO_FINDINGS_SENTINEL}" -) - -DEFAULT_CORE_PASS_SPECS: list[dict[str, str]] = [ - { - "id": "core-breadth", - "name": "Core 1: breadth coverage across all changed files", - "instructions": ( - "Task:\n" - "- Perform full-breadth review across every changed file listed above.\n" - "- Output a 'Breadth Coverage' section with one line per changed file:\n" - " :: Reviewed | NotApplicable :: short reason\n" - "- Then output actionable findings using the required schema.\n" - f"- If no actionable findings exist, include exactly this line: {NO_FINDINGS_SENTINEL}" - ), - }, - { - "id": "core-regressions", - "name": "Core 2: regressions/security/crash scan", - "instructions": ( - "Focus areas:\n" - "- behavioral regressions\n" - "- crash/nullability risks\n" - "- state corruption and data-loss risks\n" - "- security and privacy issues" - ), - }, - { - "id": "core-architecture", - "name": "Core 3: architecture/concurrency scan", - "instructions": ( - "Focus areas:\n" - "- architecture boundaries and dependency misuse\n" - "- lifecycle and concurrency/threading issues\n" - "- error-handling/fallback correctness\n" - "- protocol/contract boundary failures" - ), - }, - { - "id": "core-tests", - "name": "Core 4: test-coverage scan", - "instructions": ( - "Focus areas:\n" - "- missing tests required to protect the change\n" - "- high-risk edge cases without coverage\n" - "- regressions likely to escape without tests" - ), - }, -] - -DEFAULT_DEPTH_PASS_INSTRUCTIONS = ( - "Task:\n" - "- Perform depth-first review of hotspot file: {hotspot}\n" - "- Traverse directly related changed call paths\n" - "- Prioritize subtle behavioral, concurrency, state, and boundary-condition failures\n" - "- Output only actionable findings with required schema\n" - f"- If no actionable findings exist, include exactly this line: {NO_FINDINGS_SENTINEL}" -) - - -def die(message: str, code: int = 1) -> None: - print(f"error: {message}", file=sys.stderr) - raise SystemExit(code) - -def run_checked(cmd: list[str], cwd: Path) -> str: - try: - proc = subprocess.run( - cmd, - cwd=str(cwd), - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - check=True, - ) - except subprocess.CalledProcessError as exc: - stderr = exc.stderr.strip() - stdout = exc.stdout.strip() - details = stderr or stdout or "command failed" - die(f"{' '.join(shlex.quote(x) for x in cmd)} :: {details}") - return proc.stdout - - -def run_captured(cmd: list[str], cwd: Path, out_file: Path, *, stream_output: bool) -> int: - proc = subprocess.run( - cmd, - cwd=str(cwd), - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, +if __package__ in {None, ""}: + # Support direct script execution: + # ./codexw/__main__.py review + repo_root_for_imports = Path(__file__).resolve().parents[1] + repo_root_str = str(repo_root_for_imports) + if repo_root_str not in sys.path: + sys.path.insert(0, repo_root_str) + + from codexw.cli import build_parser + from codexw.git import ( + changed_modules, + collect_changed_files, + collect_numstat, + find_repo_root, ) - output = proc.stdout or "" - out_file.write_text(output, encoding="utf-8") - if stream_output and output: - print(output, end="") - return proc.returncode - - -def run_review_pass_with_compat( - repo_root: Path, - out_file: Path, - target_args: list[str], - target_desc: str, - prompt: str, - pass_name: str, -) -> None: - primary_cmd = ["codex", "review", *target_args, prompt] - exit_code = run_captured(primary_cmd, repo_root, out_file, stream_output=True) - if exit_code == 0: - return - - content = out_file.read_text(encoding="utf-8", errors="replace") - prompt_target_incompat = "cannot be used with '[PROMPT]'" in content - if prompt_target_incompat and target_args: - print( - "warning: codex CLI rejected prompt+target flags; " - f"retrying pass '{pass_name}' in prompt-only compatibility mode.", - file=sys.stderr, - ) - compat_prefix = ( - "Target selection requested for this pass:\n" - f"- {target_desc}\n" - "Apply review findings to the requested target using the repository context below." - ) - compat_cmd = ["codex", "review", f"{compat_prefix}\n\n{prompt}"] - exit_code = run_captured(compat_cmd, repo_root, out_file, stream_output=True) - if exit_code == 0: - return - - die(f"codex review failed in pass '{pass_name}' with exit code {exit_code}") - - -def find_repo_root(start: Path) -> Path: - try: - out = run_checked(["git", "rev-parse", "--show-toplevel"], start).strip() - if out: - return Path(out) - except SystemExit: - pass - return start - - -def git_ref_exists(repo_root: Path, ref: str) -> bool: - proc = subprocess.run( - ["git", "show-ref", "--verify", "--quiet", ref], - cwd=str(repo_root), - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - text=True, + from codexw.passes import PassBuilder, PassRunner + from codexw.profile import ( + build_bootstrap_profile, + discover_rule_files, + load_profile, + normalize_profile, + sync_profile_with_repo, + validate_rule_patterns, + write_profile, ) - return proc.returncode == 0 - - -def detect_default_base(repo_root: Path) -> str: - for candidate in ("master", "main"): - if git_ref_exists(repo_root, f"refs/heads/{candidate}"): - return candidate - for candidate in ("master", "main"): - if git_ref_exists(repo_root, f"refs/remotes/origin/{candidate}"): - return candidate - return "main" - - -def infer_repo_name(repo_root: Path) -> str: - raw = repo_root.name.strip() - if not raw: - return "Repository" - - tokens = [t for t in re.split(r"[-_]+", raw) if t] - if not tokens: - return raw - - def normalize_token(token: str) -> str: - lowered = token.lower() - special = { - "ios": "iOS", - "android": "Android", - "api": "API", - "sdk": "SDK", - "ml": "ML", - "ai": "AI", - "ui": "UI", - } - return special.get(lowered, token.capitalize()) - - return " ".join(normalize_token(t) for t in tokens) - - -def infer_rule_patterns(repo_root: Path) -> list[str]: - patterns: list[str] = [] - if (repo_root / "AGENTS.md").is_file(): - patterns.append("AGENTS.md") - if (repo_root / ".cursor/rules").is_dir(): - patterns.append(".cursor/rules/**/*.mdc") - if (repo_root / ".code_review").is_dir(): - patterns.append(".code_review/**/*.md") - if not patterns: - patterns = ["AGENTS.md", ".cursor/rules/**/*.mdc"] - return patterns - - -def parse_yaml_mapping_fragment(raw: str) -> dict[str, Any]: - text = raw.strip() - if not text: - return {} - - try: - import yaml # type: ignore - - data = yaml.safe_load(text) - return data if isinstance(data, dict) else {} - except ModuleNotFoundError: - pass - except Exception: - return {} - - parsed: dict[str, Any] = {} - current_key: str | None = None - for raw_line in text.splitlines(): - line = raw_line.strip() - if not line or line.startswith("#"): - continue - if line.startswith("-") and current_key: - current = parsed.get(current_key) - if not isinstance(current, list): - current = [] - current.append(line[1:].strip()) - parsed[current_key] = current - continue - if ":" not in line: - continue - key, value = line.split(":", 1) - k = key.strip() - v = value.strip() - current_key = k - if not v: - parsed[k] = [] - continue - if v.startswith("[") and v.endswith("]"): - inner = v[1:-1].strip() - parsed[k] = [item.strip().strip("'\"") for item in inner.split(",") if item.strip()] - continue - lowered = v.lower() - if lowered in {"true", "false"}: - parsed[k] = lowered == "true" - else: - parsed[k] = v.strip("'\"") - return parsed - - -def parse_frontmatter(path: Path) -> dict[str, Any]: - try: - text = path.read_text(encoding="utf-8", errors="replace") - except OSError: - return {} - - if not text.startswith("---"): - return {} - - match = re.match(r"^---\s*\n(.*?)\n---\s*(?:\n|$)", text, flags=re.DOTALL) - if not match: - return {} - return parse_yaml_mapping_fragment(match.group(1)) - - -def _domain_hints_from_text(text: str) -> list[str]: - # Keep inference repo-agnostic: domain ownership should come from explicit - # rule metadata or repository profile, not keyword guesses in script code. - _ = text - return [] - - -def _to_boolish(value: Any) -> bool | None: - if isinstance(value, bool): - return value - if isinstance(value, str): - lowered = value.strip().lower() - if lowered in {"true", "1", "yes", "on"}: - return True - if lowered in {"false", "0", "no", "off"}: - return False - return None - - -def _extract_rule_domains(meta: dict[str, Any], rel_path: str) -> list[str]: - domains: list[str] = [] - domain_candidates = [ - meta.get("domain"), - meta.get("domains"), - meta.get("tags"), - meta.get("category"), - meta.get("categories"), - ] - for candidate in domain_candidates: - for item in to_string_list(candidate, []): - normalized = item.strip().lower().replace(" ", "-") - if normalized: - domains.append(normalized) - - if not domains: - description = str(meta.get("description", "")).strip() - domains.extend(_domain_hints_from_text(f"{rel_path} {description}")) - return _unique(domains) - - -def discover_rule_metadata(repo_root: Path, patterns: list[str]) -> list[dict[str, Any]]: - files = discover_rule_files(repo_root, patterns) - rows: list[dict[str, Any]] = [] - for rel in files: - abs_path = repo_root / rel - meta = parse_frontmatter(abs_path) - always_apply = _to_boolish(meta.get("always_apply")) - if always_apply is None: - always_apply = _to_boolish(meta.get("alwaysApply")) - description = str(meta.get("description", "")).strip() - rows.append( - { - "path": rel, - "always_apply": bool(always_apply) if always_apply is not None else False, - "domains": _extract_rule_domains(meta, rel), - "description": description, - } - ) - return rows - - -def infer_domains_from_rule_metadata(rule_metadata: list[dict[str, Any]]) -> list[str]: - domains = {"core"} - for row in rule_metadata: - for domain in to_string_list(row.get("domains"), []): - domains.add(domain) - - result: list[str] = [] - if "core" in domains: - result.append("core") - for domain in sorted(domains): - if domain and domain not in result: - result.append(domain) - return result - - -def default_pipeline_config() -> dict[str, Any]: - return { - "include_policy_pass": True, - "include_core_passes": True, - "include_domain_passes": True, - "include_depth_passes": True, - "policy_instructions": DEFAULT_POLICY_PASS_INSTRUCTIONS, - "core_passes": json.loads(json.dumps(DEFAULT_CORE_PASS_SPECS)), - "depth_instructions": DEFAULT_DEPTH_PASS_INSTRUCTIONS, - } - - -def default_domain_prompt_template(domain: str) -> str: - return ( - f"Domain focus: {domain}\n" - "Focus areas:\n" - "- domain-specific correctness and policy compliance\n" - "- behavior/regression risks and boundary-condition failures\n" - "- state, contract, lifecycle, or concurrency issues relevant to this domain\n" - "- missing or weak tests for critical domain behavior" + from codexw.reporting import ( + write_combined_report, + write_empty_report, + write_findings_json, + write_support_files, ) - - -def build_bootstrap_profile(repo_root: Path) -> dict[str, Any]: - rule_patterns = infer_rule_patterns(repo_root) - rule_metadata = discover_rule_metadata(repo_root, rule_patterns) - domains = infer_domains_from_rule_metadata(rule_metadata) - by_domain: dict[str, str] = { - d: default_domain_prompt_template(d) - for d in domains - if d != "core" - } - - return { - "version": 1, - "repo": {"name": infer_repo_name(repo_root)}, - "review": { - "default_base": detect_default_base(repo_root), - "strict_gate": True, - "depth_hotspots": 3, - "output_root": ".codex/review-runs", - }, - "rules": {"include": rule_patterns}, - "domains": {"default": domains, "allowed": domains}, - "prompts": { - "global": DEFAULT_GLOBAL_PROMPT, - "by_domain": by_domain, - }, - "pipeline": default_pipeline_config(), - } - - -def _yaml_plain_scalar_allowed(value: str) -> bool: - if not value or value.strip() != value: - return False - if any(ch in value for ch in ":#{}[]&,*!?|>'\"%@`"): - return False - if value[0] in "-?:!&*@`": - return False - if "\n" in value or "\r" in value or "\t" in value: - return False - lowered = value.lower() - if lowered in {"true", "false", "null", "~", "yes", "no", "on", "off"}: - return False - if re.fullmatch(r"[+-]?\d+(?:\.\d+)?", value): - return False - return True - - -def _yaml_inline_scalar(value: Any) -> str: - if value is None: - return "null" - if isinstance(value, bool): - return "true" if value else "false" - if isinstance(value, (int, float)): - return str(value) - text = str(value) - if _yaml_plain_scalar_allowed(text): - return text - return json.dumps(text) - - -def _yaml_emit(value: Any, indent: int = 0) -> list[str]: - pad = " " * indent - - if isinstance(value, dict): - if not value: - return [pad + "{}"] - lines: list[str] = [] - for key, raw_val in value.items(): - key_text = str(key) - if isinstance(raw_val, str) and "\n" in raw_val: - lines.append(f"{pad}{key_text}: |") - for line in raw_val.splitlines(): - lines.append(" " * (indent + 2) + line) - continue - if isinstance(raw_val, dict): - if raw_val: - lines.append(f"{pad}{key_text}:") - lines.extend(_yaml_emit(raw_val, indent + 2)) - else: - lines.append(f"{pad}{key_text}: {{}}") - continue - if isinstance(raw_val, list): - if raw_val: - lines.append(f"{pad}{key_text}:") - lines.extend(_yaml_emit(raw_val, indent + 2)) - else: - lines.append(f"{pad}{key_text}: []") - continue - lines.append(f"{pad}{key_text}: {_yaml_inline_scalar(raw_val)}") - return lines - - if isinstance(value, list): - if not value: - return [pad + "[]"] - lines: list[str] = [] - for item in value: - if isinstance(item, str) and "\n" in item: - lines.append(f"{pad}- |") - for line in item.splitlines(): - lines.append(" " * (indent + 2) + line) - continue - if isinstance(item, dict): - if not item: - lines.append(f"{pad}- {{}}") - else: - lines.append(f"{pad}-") - lines.extend(_yaml_emit(item, indent + 2)) - continue - if isinstance(item, list): - if not item: - lines.append(f"{pad}- []") - else: - lines.append(f"{pad}-") - lines.extend(_yaml_emit(item, indent + 2)) - continue - lines.append(f"{pad}- {_yaml_inline_scalar(item)}") - return lines - - return [pad + _yaml_inline_scalar(value)] - - -def _dump_yaml_text(value: Any) -> str: - return "\n".join(_yaml_emit(value)).rstrip() + "\n" - - -def write_profile(path: Path, profile: dict[str, Any]) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(_dump_yaml_text(profile), encoding="utf-8") - - -def _is_closed_quoted_scalar(text: str) -> bool: - stripped = text.strip() - if len(stripped) < 2: - return False - - if stripped[0] == "'" and stripped[-1] == "'": - idx = 1 - while idx < len(stripped) - 1: - if stripped[idx] == "'": - if idx + 1 < len(stripped) and stripped[idx + 1] == "'": - idx += 2 - continue - return False - idx += 1 - return True - - if stripped[0] == '"' and stripped[-1] == '"': - escaped = False - for ch in stripped[1:-1]: - if escaped: - escaped = False - elif ch == "\\": - escaped = True - elif ch == '"': - return False - return not escaped - - return False - - -def _is_closed_flow_collection(text: str) -> bool: - stripped = text.strip() - if len(stripped) < 2: - return False - if stripped[0] not in "[{": - return False - expected_end = "]" if stripped[0] == "[" else "}" - if stripped[-1] != expected_end: - return False - - depth = 0 - in_single = False - in_double = False - escaped = False - idx = 0 - while idx < len(stripped): - ch = stripped[idx] - if in_double: - if escaped: - escaped = False - elif ch == "\\": - escaped = True - elif ch == '"': - in_double = False - idx += 1 - continue - - if in_single: - if ch == "'": - if idx + 1 < len(stripped) and stripped[idx + 1] == "'": - idx += 2 - continue - in_single = False - idx += 1 - continue - - if ch == '"': - in_double = True - idx += 1 - continue - if ch == "'": - in_single = True - idx += 1 - continue - - if ch in "[{": - depth += 1 - elif ch in "]}": - depth -= 1 - if depth < 0: - return False - idx += 1 - - return depth == 0 and not in_single and not in_double and not escaped - - -def _strip_yaml_inline_comment(raw: str) -> str: - text = raw.rstrip() - in_single = False - in_double = False - escaped = False - idx = 0 - while idx < len(text): - ch = text[idx] - if in_double: - if escaped: - escaped = False - elif ch == "\\": - escaped = True - elif ch == '"': - in_double = False - idx += 1 - continue - - if in_single: - if ch == "'": - # YAML single-quote escape: doubled apostrophe. - if idx + 1 < len(text) and text[idx + 1] == "'": - idx += 2 - continue - in_single = False - idx += 1 - continue - - if ch == '"': - in_double = True - elif ch == "'": - in_single = True - elif ch == "#": - prefix = text[:idx].rstrip() - if ( - idx == 0 - or text[idx - 1].isspace() - or _is_closed_quoted_scalar(prefix) - or _is_closed_flow_collection(prefix) - ): - return text[:idx].rstrip() - idx += 1 - return text - - -def _split_yaml_flow_items(raw: str) -> list[str]: - items: list[str] = [] - buf: list[str] = [] - in_single = False - in_double = False - escaped = False - depth = 0 - - idx = 0 - while idx < len(raw): - ch = raw[idx] - if in_double: - buf.append(ch) - if escaped: - escaped = False - elif ch == "\\": - escaped = True - elif ch == '"': - in_double = False - idx += 1 - continue - - if in_single: - buf.append(ch) - if ch == "'": - if idx + 1 < len(raw) and raw[idx + 1] == "'": - buf.append(raw[idx + 1]) - idx += 2 - continue - in_single = False - idx += 1 - continue - - if ch == '"': - in_double = True - buf.append(ch) - idx += 1 - continue - if ch == "'": - in_single = True - buf.append(ch) - idx += 1 - continue - - if ch in "[{(": - depth += 1 - buf.append(ch) - idx += 1 - continue - if ch in "]})": - if depth > 0: - depth -= 1 - buf.append(ch) - idx += 1 - continue - - if ch == "," and depth == 0: - items.append("".join(buf).strip()) - buf = [] - idx += 1 - continue - - buf.append(ch) - idx += 1 - - tail = "".join(buf).strip() - if tail or raw.strip(): - items.append(tail) - return [item for item in items if item != ""] - - -def _parse_simple_yaml_scalar(raw: str) -> Any: - token = _strip_yaml_inline_comment(raw).strip() - if token == "": - return "" - lowered = token.lower() - if lowered == "true": - return True - if lowered == "false": - return False - if lowered in {"null", "~"}: - return None - if token == "{}": - return {} - if token == "[]": - return [] - if token.startswith("[") and token.endswith("]"): - inner = token[1:-1].strip() - if not inner: - return [] - return [_parse_simple_yaml_scalar(item) for item in _split_yaml_flow_items(inner)] - if token.startswith("{") and token.endswith("}"): - inner = token[1:-1].strip() - if not inner: - return {} - out: dict[str, Any] = {} - for item in _split_yaml_flow_items(inner): - if ":" not in item: - return token - key_raw, value_raw = item.split(":", 1) - key = _parse_simple_yaml_scalar(key_raw) - out[str(key)] = _parse_simple_yaml_scalar(value_raw) - return out - if re.fullmatch(r"[+-]?\d+", token): - try: - return int(token) - except ValueError: - return token - if re.fullmatch(r"[+-]?\d+\.\d+", token): - try: - return float(token) - except ValueError: - return token - if token.startswith('"') and token.endswith('"'): - try: - return json.loads(token) - except json.JSONDecodeError: - return token[1:-1] - if token.startswith("'") and token.endswith("'") and len(token) >= 2: - return token[1:-1].replace("''", "'") - return token - - -class _SimpleYamlParser: - def __init__(self, text: str) -> None: - self.lines = text.splitlines() - self.index = 0 - - @staticmethod - def _indent(line: str) -> int: - return len(line) - len(line.lstrip(" ")) - - @staticmethod - def _is_ignorable(line: str) -> bool: - stripped = line.strip() - return not stripped or stripped.startswith("#") or stripped in {"---", "..."} - - def _skip_ignorable(self) -> None: - while self.index < len(self.lines) and self._is_ignorable(self.lines[self.index]): - self.index += 1 - - def parse(self) -> Any: - self._skip_ignorable() - if self.index >= len(self.lines): - return {} - start_indent = self._indent(self.lines[self.index]) - value = self._parse_block(start_indent) - self._skip_ignorable() - if self.index < len(self.lines): - raise ValueError(f"unexpected trailing content near line {self.index + 1}") - return value - - def _parse_block(self, indent: int) -> Any: - self._skip_ignorable() - if self.index >= len(self.lines): - return {} - - cur_indent = self._indent(self.lines[self.index]) - if cur_indent < indent: - return {} - if cur_indent > indent: - indent = cur_indent - - content = self.lines[self.index][indent:] - if content == "-" or content.startswith("- "): - return self._parse_list(indent) - return self._parse_map(indent) - - def _parse_map(self, indent: int) -> dict[str, Any]: - out: dict[str, Any] = {} - while True: - self._skip_ignorable() - if self.index >= len(self.lines): - break - - line = self.lines[self.index] - cur_indent = self._indent(line) - if cur_indent < indent: - break - if cur_indent > indent: - raise ValueError(f"unexpected indentation at line {self.index + 1}") - - content = line[indent:] - if content == "-" or content.startswith("- "): - break - if ":" not in content: - raise ValueError(f"invalid mapping entry at line {self.index + 1}") - - key, raw_rest = content.split(":", 1) - key = key.strip() - rest = _strip_yaml_inline_comment(raw_rest).strip() - self.index += 1 - - if not key: - raise ValueError(f"empty mapping key at line {self.index}") - - if rest in {"|", "|-", ">", ">-"}: - out[key] = self._parse_block_scalar(indent + 2) - elif rest == "": - out[key] = self._parse_nested(indent + 2) - else: - out[key] = _parse_simple_yaml_scalar(rest) - - return out - - def _parse_nested(self, expected_indent: int) -> Any: - self._skip_ignorable() - if self.index >= len(self.lines): - return None - - cur_indent = self._indent(self.lines[self.index]) - if cur_indent < expected_indent: - return None - if cur_indent > expected_indent: - expected_indent = cur_indent - - content = self.lines[self.index][expected_indent:] - if content == "-" or content.startswith("- "): - return self._parse_list(expected_indent) - return self._parse_map(expected_indent) - - def _parse_list(self, indent: int) -> list[Any]: - out: list[Any] = [] - while True: - self._skip_ignorable() - if self.index >= len(self.lines): - break - - line = self.lines[self.index] - cur_indent = self._indent(line) - if cur_indent < indent: - break - if cur_indent > indent: - raise ValueError(f"unexpected indentation at line {self.index + 1}") - - content = line[indent:] - if not (content == "-" or content.startswith("- ")): - break - - rest = "" if content == "-" else _strip_yaml_inline_comment(content[2:]).strip() - self.index += 1 - - if rest in {"|", "|-", ">", ">-"}: - out.append(self._parse_block_scalar(indent + 2)) - continue - - if rest == "": - out.append(self._parse_nested(indent + 2)) - continue - - inline_map_match = re.match(r"^([A-Za-z0-9_.-]+):(?:\s+|$)(.*)$", rest) - if inline_map_match: - key = inline_map_match.group(1).strip() - tail = _strip_yaml_inline_comment(inline_map_match.group(2)).strip() - item: dict[str, Any] = {} - if tail in {"|", "|-", ">", ">-"}: - item[key] = self._parse_block_scalar(indent + 4) - elif tail == "": - item[key] = self._parse_nested(indent + 4) - else: - item[key] = _parse_simple_yaml_scalar(tail) - for extra_key, extra_val in self._parse_map(indent + 2).items(): - item[extra_key] = extra_val - out.append(item) - continue - - out.append(_parse_simple_yaml_scalar(rest)) - - return out - - def _parse_block_scalar(self, indent: int) -> str: - lines: list[str] = [] - while self.index < len(self.lines): - raw = self.lines[self.index] - if raw.strip() == "": - lines.append("") - self.index += 1 - continue - - cur_indent = self._indent(raw) - if cur_indent < indent: - break - - lines.append(raw[indent:]) - self.index += 1 - - while lines and lines[-1] == "": - lines.pop() - return "\n".join(lines) - - -def _parse_simple_yaml(text: str) -> Any: - return _SimpleYamlParser(text).parse() - - -def _stable(obj: Any) -> str: - return json.dumps(obj, sort_keys=True, separators=(",", ":")) - - -def _unique(values: list[str]) -> list[str]: - seen: set[str] = set() - out: list[str] = [] - for v in values: - s = str(v).strip() - if not s or s in seen: - continue - seen.add(s) - out.append(s) - return out - - -def _ensure_dict(parent: dict[str, Any], key: str) -> dict[str, Any]: - cur = parent.get(key) - if isinstance(cur, dict): - return cur - parent[key] = {} - return parent[key] - - -def sync_profile_with_repo( - raw_profile: dict[str, Any], - repo_root: Path, - *, - prune_autogen: bool, -) -> tuple[dict[str, Any], bool]: - before = _stable(raw_profile) - profile: dict[str, Any] = json.loads(json.dumps(raw_profile)) - inferred = build_bootstrap_profile(repo_root) - - profile_meta = _ensure_dict(profile, "profile_meta") - autogen = _ensure_dict(profile_meta, "autogen") - prev_autogen_rules = to_string_list(autogen.get("rules_include"), []) - prev_autogen_domains = to_string_list(autogen.get("domains"), []) - prev_prompt_raw = autogen.get("prompt_by_domain") - prev_autogen_prompt_map: dict[str, str] = {} - if isinstance(prev_prompt_raw, dict): - for key, value in prev_prompt_raw.items(): - k = str(key).strip() - if not k: - continue - prev_autogen_prompt_map[k] = str(value) - - repo = _ensure_dict(profile, "repo") - if not str(repo.get("name", "")).strip(): - repo["name"] = inferred["repo"]["name"] - - review = _ensure_dict(profile, "review") - if not str(review.get("default_base", "")).strip(): - review["default_base"] = inferred["review"]["default_base"] - if "strict_gate" not in review: - review["strict_gate"] = True - if "depth_hotspots" not in review: - review["depth_hotspots"] = 3 - if not str(review.get("output_root", "")).strip(): - review["output_root"] = ".codex/review-runs" - - rules = _ensure_dict(profile, "rules") - existing_patterns = to_string_list(rules.get("include"), []) - inferred_patterns = to_string_list(inferred["rules"]["include"], []) - if prune_autogen and prev_autogen_rules: - prev_rule_set = set(prev_autogen_rules) - existing_patterns = [p for p in existing_patterns if p not in prev_rule_set] - rules["include"] = _unique(existing_patterns + inferred_patterns) - - domains = _ensure_dict(profile, "domains") - existing_allowed = to_string_list(domains.get("allowed"), []) - existing_default = to_string_list(domains.get("default"), []) - inferred_domains = to_string_list(inferred["domains"]["default"], ["core"]) - if prune_autogen and prev_autogen_domains: - prev_domain_set = set(prev_autogen_domains) - existing_allowed = [d for d in existing_allowed if d not in prev_domain_set] - existing_default = [d for d in existing_default if d not in prev_domain_set] - - merged_allowed = _unique(existing_allowed + inferred_domains) - merged_default = _unique(existing_default + inferred_domains) - merged_default = [d for d in merged_default if d in set(merged_allowed)] - if not merged_allowed: - merged_allowed = ["core"] - if not merged_default: - merged_default = ["core"] - - domains["allowed"] = merged_allowed - domains["default"] = merged_default - - prompts = _ensure_dict(profile, "prompts") - if not str(prompts.get("global", "")).strip(): - prompts["global"] = inferred["prompts"]["global"] - - by_domain = prompts.get("by_domain") - if not isinstance(by_domain, dict): - by_domain = {} - - inferred_by_domain = inferred["prompts"]["by_domain"] - new_autogen_prompt_map = dict(prev_autogen_prompt_map) - for domain in merged_allowed: - if domain not in inferred_by_domain: - continue - inferred_prompt = inferred_by_domain[domain] - existing_prompt = str(by_domain.get(domain, "")).strip() - prev_prompt = str(prev_autogen_prompt_map.get(domain, "")).strip() - if not existing_prompt: - by_domain[domain] = inferred_prompt - elif prev_prompt and existing_prompt == prev_prompt and existing_prompt != inferred_prompt: - by_domain[domain] = inferred_prompt - new_autogen_prompt_map[domain] = inferred_prompt - - if prune_autogen: - for domain in list(by_domain.keys()): - if domain in inferred_by_domain: - continue - prev_prompt = str(prev_autogen_prompt_map.get(domain, "")).strip() - current_prompt = str(by_domain.get(domain, "")).strip() - if prev_prompt and current_prompt == prev_prompt: - del by_domain[domain] - new_autogen_prompt_map.pop(domain, None) - - prompts["by_domain"] = by_domain - - pipeline = _ensure_dict(profile, "pipeline") - inferred_pipeline = inferred.get("pipeline") - if isinstance(inferred_pipeline, dict): - for key, value in inferred_pipeline.items(): - if key not in pipeline: - pipeline[key] = value - existing_core_passes = pipeline.get("core_passes") - if not isinstance(existing_core_passes, list) or not existing_core_passes: - pipeline["core_passes"] = inferred_pipeline.get("core_passes", []) - - if "version" not in profile: - profile["version"] = 1 - - after_without_meta = _stable(profile) - changed = before != after_without_meta - - if prune_autogen: - autogen["rules_include"] = inferred_patterns - autogen["domains"] = inferred_domains - autogen["prompt_by_domain"] = { - domain: prompt - for domain, prompt in new_autogen_prompt_map.items() - if domain in inferred_by_domain - } - else: - autogen["rules_include"] = _unique(prev_autogen_rules + inferred_patterns) - autogen["domains"] = _unique(prev_autogen_domains + inferred_domains) - preserved_prompt_map = dict(prev_autogen_prompt_map) - for domain, prompt in inferred_by_domain.items(): - preserved_prompt_map[domain] = prompt - autogen["prompt_by_domain"] = preserved_prompt_map - - meta = _ensure_dict(profile, "profile_meta") - if changed: - meta["managed_by"] = "codexw" - meta["last_synced_utc"] = dt.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") - meta["sync_mode"] = "merge+prune" if prune_autogen else "merge" - - final_changed = before != _stable(profile) - return profile, final_changed - - -def load_yaml_or_json(path: Path) -> dict[str, Any]: - text = path.read_text(encoding="utf-8") - - try: - import yaml # type: ignore - - data = yaml.safe_load(text) - if not isinstance(data, dict): - die(f"profile at {path} must be a mapping/object") - return data - except ModuleNotFoundError: - pass - except Exception as exc: - die(f"invalid YAML in {path}: {exc}") - - try: - data = json.loads(text) - except json.JSONDecodeError: - try: - data = _parse_simple_yaml(text) - except ValueError as exc: - die( - "PyYAML not available and profile parsing failed. " - "Install PyYAML (python3 -m pip install pyyaml) or provide supported YAML/JSON syntax. " - f"Details: {exc}" - ) - if not isinstance(data, dict): - die(f"profile at {path} must be a mapping/object") - return data - - -def to_bool(value: Any, default: bool) -> bool: - if value is None: - return default - if isinstance(value, bool): - return value - if isinstance(value, str): - norm = value.strip().lower() - if norm in {"1", "true", "yes", "on"}: - return True - if norm in {"0", "false", "no", "off"}: - return False - return default - - -def to_int(value: Any, default: int) -> int: - if value is None: - return default - try: - parsed = int(value) - return parsed if parsed >= 0 else default - except (TypeError, ValueError): - return default - - -def to_string_list(value: Any, default: list[str] | None = None) -> list[str]: - if value is None: - return list(default or []) - if isinstance(value, list): - return [str(x).strip() for x in value if str(x).strip()] - if isinstance(value, str): - return [x.strip() for x in value.split(",") if x.strip()] - return list(default or []) - - -def to_nonempty_string(value: Any, default: str) -> str: - if isinstance(value, str): - text = value.strip() - return text if text else default - return default - - -def normalize_profile(raw: dict[str, Any]) -> dict[str, Any]: - repo = raw.get("repo") or {} - review = raw.get("review") or {} - rules = raw.get("rules") or {} - domains = raw.get("domains") or {} - prompts = raw.get("prompts") or {} - pipeline = raw.get("pipeline") or {} - - if not isinstance(repo, dict): - repo = {} - if not isinstance(review, dict): - review = {} - if not isinstance(rules, dict): - rules = {} - if not isinstance(domains, dict): - domains = {} - if not isinstance(prompts, dict): - prompts = {} - if not isinstance(pipeline, dict): - pipeline = {} - - allowed_domains = to_string_list(domains.get("allowed"), ["core"]) - default_domains = to_string_list(domains.get("default"), allowed_domains) - if not allowed_domains: - allowed_domains = ["core"] - if not default_domains: - default_domains = list(allowed_domains) - - domain_prompt_map = prompts.get("by_domain") - if not isinstance(domain_prompt_map, dict): - domain_prompt_map = {} - - default_pipeline = default_pipeline_config() - pipeline_core_raw = pipeline.get("core_passes") - if not isinstance(pipeline_core_raw, list) or not pipeline_core_raw: - pipeline_core_raw = default_pipeline["core_passes"] - - pipeline_core_passes: list[dict[str, str]] = [] - for idx, raw_pass in enumerate(pipeline_core_raw, start=1): - if not isinstance(raw_pass, dict): - continue - pass_id = str(raw_pass.get("id", f"core-pass-{idx}")).strip() or f"core-pass-{idx}" - pass_name = str(raw_pass.get("name", pass_id)).strip() or pass_id - instructions = str(raw_pass.get("instructions", "")).strip() - if not instructions: - continue - pipeline_core_passes.append( - { - "id": pass_id, - "name": pass_name, - "instructions": instructions, - } - ) - - if not pipeline_core_passes: - pipeline_core_passes = json.loads(json.dumps(default_pipeline["core_passes"])) - - return { - "version": str(raw.get("version", "1")), - "repo_name": to_nonempty_string(repo.get("name"), "Repository"), - "default_base": to_nonempty_string(review.get("default_base"), "main"), - "strict_gate": to_bool(review.get("strict_gate"), True), - "depth_hotspots": to_int(review.get("depth_hotspots"), 3), - "output_root": to_nonempty_string(review.get("output_root"), ".codex/review-runs"), - "rule_patterns": to_string_list(rules.get("include"), ["AGENTS.md", ".cursor/rules/**/*.mdc"]), - "default_domains": default_domains, - "allowed_domains": allowed_domains, - "global_prompt": str(prompts.get("global", "")).strip(), - "domain_prompts": { - str(k): str(v).strip() for k, v in domain_prompt_map.items() if str(v).strip() - }, - "pipeline": { - "include_policy_pass": to_bool( - pipeline.get("include_policy_pass"), - to_bool(default_pipeline.get("include_policy_pass"), True), - ), - "include_core_passes": to_bool( - pipeline.get("include_core_passes"), - to_bool(default_pipeline.get("include_core_passes"), True), - ), - "include_domain_passes": to_bool( - pipeline.get("include_domain_passes"), - to_bool(default_pipeline.get("include_domain_passes"), True), - ), - "include_depth_passes": to_bool( - pipeline.get("include_depth_passes"), - to_bool(default_pipeline.get("include_depth_passes"), True), - ), - "policy_instructions": str( - pipeline.get("policy_instructions", default_pipeline["policy_instructions"]) - ).strip() - or default_pipeline["policy_instructions"], - "core_passes": pipeline_core_passes, - "depth_instructions": str( - pipeline.get("depth_instructions", default_pipeline["depth_instructions"]) - ).strip() - or default_pipeline["depth_instructions"], - }, - } - - -def discover_rule_files(repo_root: Path, patterns: list[str]) -> list[str]: - matches: set[str] = set() - for pattern in patterns: - expanded = glob.glob(str(repo_root / pattern), recursive=True) - for abs_path in expanded: - p = Path(abs_path) - if not p.is_file(): - continue - try: - rel = p.relative_to(repo_root) - except ValueError: - continue - matches.add(str(rel)) - return sorted(matches) - - -def validate_rule_patterns(repo_root: Path, patterns: list[str]) -> tuple[list[str], list[str]]: - valid_patterns: list[str] = [] - warnings: list[str] = [] - for pattern in patterns: - normalized = str(pattern).strip() - if not normalized: - continue - matches = discover_rule_files(repo_root, [normalized]) - if matches: - valid_patterns.append(normalized) - continue - if any(ch in normalized for ch in "*?[]"): - warnings.append(f"rule pattern '{normalized}' matched no files") - else: - warnings.append(f"rule file '{normalized}' not found") - return valid_patterns, warnings - - -def collect_changed_files(repo_root: Path, mode: str, base: str, commit: str) -> list[str]: - if mode == "base": - out = run_checked(["git", "diff", "--name-only", f"{base}...HEAD"], repo_root) - return sorted({line.strip() for line in out.splitlines() if line.strip()}) - if mode == "uncommitted": - out1 = run_checked(["git", "diff", "--name-only", "HEAD"], repo_root) - out2 = run_checked(["git", "ls-files", "--others", "--exclude-standard"], repo_root) - return sorted({line.strip() for line in (out1 + "\n" + out2).splitlines() if line.strip()}) - if mode == "commit": - out = run_checked(["git", "show", "--name-only", "--pretty=", commit], repo_root) - return sorted({line.strip() for line in out.splitlines() if line.strip()}) - die(f"unsupported mode: {mode}") - return [] - - -def collect_numstat(repo_root: Path, mode: str, base: str, commit: str) -> list[tuple[int, str]]: - if mode == "base": - cmd = ["git", "diff", "--numstat", f"{base}...HEAD"] - elif mode == "uncommitted": - cmd = ["git", "diff", "--numstat", "HEAD"] - elif mode == "commit": - cmd = ["git", "show", "--numstat", "--pretty=", commit] - else: - die(f"unsupported mode: {mode}") - return [] - - out = run_checked(cmd, repo_root) - rows: list[tuple[int, str]] = [] - for raw in out.splitlines(): - parts = raw.split("\t") - if len(parts) < 3: - continue - add_raw, del_raw, path = parts[0], parts[1], parts[2] - add = int(add_raw) if add_raw.isdigit() else 0 - rem = int(del_raw) if del_raw.isdigit() else 0 - rows.append((add + rem, path)) - rows.sort(key=lambda x: x[0], reverse=True) - return rows - - -def changed_modules(changed_files: list[str]) -> list[tuple[int, str]]: - counts: dict[str, int] = {} - for path in changed_files: - parts = path.split("/") - key = "/".join(parts[:2]) if len(parts) >= 2 else parts[0] - counts[key] = counts.get(key, 0) + 1 - rows = [(count, module) for module, count in counts.items()] - rows.sort(key=lambda x: (-x[0], x[1])) - return rows - - -def pass_has_no_findings(text: str, parsed_findings: list[dict[str, Any]] | None = None) -> bool: - if NO_FINDINGS_SENTINEL not in text: - return False - if parsed_findings is None: - parsed_findings = parse_findings_from_pass(text, "probe") - return len(parsed_findings) == 0 - - -def rule_block(rule_files: list[str]) -> str: - if not rule_files: - return "Required standards files (read and enforce strictly):\n- (none discovered)" - lines = ["Required standards files (read and enforce strictly):"] - lines.extend([f"- {rule}" for rule in rule_files]) - return "\n".join(lines) - - -def build_diff_context(changed_files: list[str], modules: list[tuple[int, str]], hotspots: list[str]) -> str: - mod_lines = "\n".join([f"- {m} ({c} files)" for c, m in modules]) or "- (none)" - hot_lines = "\n".join([f"- {h}" for h in hotspots]) or "- (none)" - file_lines = "\n".join([f"- {f}" for f in changed_files]) or "- (none)" - return ( - "Change context for breadth/depth coverage:\n" - f"- Changed files count: {len(changed_files)}\n" - "- Changed modules:\n" - f"{mod_lines}\n" - "- Top hotspots (by changed lines):\n" - f"{hot_lines}\n" - "- Changed files:\n" - f"{file_lines}" + from codexw.utils import CodexwError, shutil_which +else: + from .cli import build_parser + from .git import ( + changed_modules, + collect_changed_files, + collect_numstat, + find_repo_root, ) - - -def domain_prompt(domain: str, profile: dict[str, Any]) -> str: - custom = profile["domain_prompts"].get(domain, "") - base = ( - f"Domain focus: {domain}\n" - f"- identify domain-specific correctness and policy violations for '{domain}'\n" - "- prioritize regressions and production-risk behavior in changed code" + from .passes import PassBuilder, PassRunner + from .profile import ( + build_bootstrap_profile, + discover_rule_files, + load_profile, + normalize_profile, + sync_profile_with_repo, + validate_rule_patterns, + write_profile, ) - return base + ("\n" + custom if custom else "") - - -def sanitize_pass_id(value: str) -> str: - return re.sub(r"[^a-zA-Z0-9_-]", "-", value.strip()).strip("-") or "pass" - - -def extract_line_number(raw: str) -> int | None: - match = re.search(r"\d+", raw) - if not match: - return None - try: - number = int(match.group(0)) - except ValueError: - return None - return number if number > 0 else None - - -def normalize_finding_line(raw_line: str) -> str: - line = raw_line.strip() - if not line: - return "" - - line = re.sub(r"^[-*+]\s*", "", line) - line = re.sub(r"^\d+[.)]\s*", "", line) - line = re.sub(r"^\*\*([^*]+)\*\*\s*", r"\1 ", line) - line = re.sub(r"^__([^_]+)__\s*", r"\1 ", line) - line = re.sub(r"^`([^`]+)`\s*", r"\1 ", line) - line = re.sub(r"\s+:\s*", ": ", line, count=1) - return line - - -def parse_findings_from_pass(text: str, pass_id: str) -> list[dict[str, Any]]: - findings: list[dict[str, Any]] = [] - current: dict[str, Any] | None = None - - def flush() -> None: - nonlocal current - if not current: - return - severity = str(current.get("severity", "")).strip().upper() - file_path = str(current.get("file_path", "")).strip() - if severity and file_path: - current["pass_id"] = pass_id - current["line"] = extract_line_number(str(current.get("line_raw", ""))) - findings.append(current) - current = None - - for raw_line in text.splitlines(): - line = normalize_finding_line(raw_line) - if not line: - continue - if NO_FINDINGS_SENTINEL in line: - continue - - severity_match = re.match(r"(?i)^severity\s*:\s*(P[0-3])\b", line) - if severity_match: - flush() - current = { - "severity": severity_match.group(1).upper(), - "type": "", - "file_path": "", - "line_raw": "", - "rule": "", - "risk": "", - "fix": "", - "title": "", - } - continue - - if not current: - continue - - if re.match(r"(?i)^type\s*:", line): - current["type"] = line.split(":", 1)[1].strip() - elif re.match(r"(?i)^(file\s*path|path|file)\s*:", line): - current["file_path"] = line.split(":", 1)[1].strip() - elif re.match(r"(?i)^(line|line\s*number|precise line number|line range)\s*:", line): - current["line_raw"] = line.split(":", 1)[1].strip() - elif re.match(r"(?i)^violated rule", line): - current["rule"] = line.split(":", 1)[1].strip() - elif re.match(r"(?i)^why this is risky\s*:", line): - current["risk"] = line.split(":", 1)[1].strip() - elif re.match(r"(?i)^minimal fix direction\s*:", line): - current["fix"] = line.split(":", 1)[1].strip() - elif re.match(r"(?i)^title\s*:", line): - current["title"] = line.split(":", 1)[1].strip() - else: - if current.get("risk"): - current["risk"] = f"{current['risk']} {line}".strip() - - flush() - return findings + from .reporting import ( + write_combined_report, + write_empty_report, + write_findings_json, + write_support_files, + ) + from .utils import CodexwError, shutil_which -def run_review(args: argparse.Namespace) -> int: +def run_review(args) -> int: + """Execute the review workflow.""" repo_root = find_repo_root(Path.cwd()) os.chdir(repo_root) + # Resolve profile path profile_path = Path(args.profile or "local-review-profile.yaml") if not profile_path.is_absolute(): profile_path = repo_root / profile_path + # Bootstrap profile if missing if not profile_path.exists(): if args.no_bootstrap_profile: - die( + raise CodexwError( f"profile not found: {profile_path}. " - "Add local-review-profile.yaml in repository root or pass --profile." + "Add local-review-profile.yaml or pass --profile." ) bootstrap_profile = build_bootstrap_profile(repo_root) write_profile(profile_path, bootstrap_profile) @@ -1527,21 +104,19 @@ def run_review(args: argparse.Namespace) -> int: except ValueError: profile_display = str(profile_path) print( - f"Generated {profile_display} automatically from repository signals. " - "Review and commit it.", + f"Generated {profile_display} from repository signals. Review and commit it.", file=sys.stderr, ) if not profile_path.exists(): - die( - f"profile not found: {profile_path}. " - "Add local-review-profile.yaml in repository root or pass --profile." - ) + raise CodexwError(f"profile not found: {profile_path}") + # Validate sync options if args.sync_profile_only and args.no_sync_profile: - die("--sync-profile-only cannot be combined with --no-sync-profile") + raise CodexwError("--sync-profile-only cannot be combined with --no-sync-profile") - raw_profile = load_yaml_or_json(profile_path) + # Load and sync profile + raw_profile = load_profile(profile_path) if args.no_sync_profile: synced_profile = raw_profile else: @@ -1557,27 +132,19 @@ def run_review(args: argparse.Namespace) -> int: except ValueError: profile_display = str(profile_path) print( - f"Synchronized {profile_display} from repository signals " - f"(prune_autogen={'on' if not args.no_prune_autogen else 'off'}).", + f"Synchronized {profile_display} from repository signals.", file=sys.stderr, ) profile = normalize_profile(synced_profile) - resolved_rule_patterns, rule_pattern_warnings = validate_rule_patterns( - repo_root, - profile["rule_patterns"], - ) - for warning in rule_pattern_warnings: + # Validate rule patterns + resolved_patterns, warnings = validate_rule_patterns(repo_root, profile["rule_patterns"]) + for warning in warnings: print(f"warning: {warning}", file=sys.stderr) - if profile["rule_patterns"] and not resolved_rule_patterns: - print( - "warning: no enforceable rule files were resolved from profile rule patterns; " - "continuing without rule-file enforcement.", - file=sys.stderr, - ) - profile["rule_patterns"] = resolved_rule_patterns + profile["rule_patterns"] = resolved_patterns + # Handle print-effective-profile if args.print_effective_profile: print( json.dumps( @@ -1592,13 +159,16 @@ def run_review(args: argparse.Namespace) -> int: ) return 0 + # Handle bootstrap/sync-only modes if args.bootstrap_only or args.sync_profile_only: print(f"Profile ready: {profile_path}") return 0 + # Verify codex CLI is available if not shutil_which("codex"): - die("codex CLI not found in PATH") + raise CodexwError("codex CLI not found in PATH") + # Determine review mode mode = "base" base_branch = args.base or profile["default_base"] commit_sha = args.commit or "" @@ -1607,14 +177,19 @@ def run_review(args: argparse.Namespace) -> int: elif args.commit: mode = "commit" + # Determine gating mode fail_on_findings = profile["strict_gate"] if args.fail_on_findings: fail_on_findings = True if args.no_fail_on_findings: fail_on_findings = False - depth_hotspots = args.depth_hotspots if args.depth_hotspots is not None else profile["depth_hotspots"] + # Determine depth hotspots + depth_hotspots = ( + args.depth_hotspots if args.depth_hotspots is not None else profile["depth_hotspots"] + ) + # Validate domains allowed_domains = profile["allowed_domains"] default_domains = profile["default_domains"] if args.domains: @@ -1624,16 +199,19 @@ def run_review(args: argparse.Namespace) -> int: unknown = [d for d in selected_domains if d not in allowed_domains] if unknown: - die(f"invalid domain(s): {', '.join(unknown)}. Allowed: {', '.join(allowed_domains)}") + raise CodexwError( + f"invalid domain(s): {', '.join(unknown)}. Allowed: {', '.join(allowed_domains)}" + ) + # Setup output directory ts = dt.datetime.now().strftime("%Y%m%d-%H%M%S") output_root = Path(args.output_dir) if args.output_dir else Path(profile["output_root"]) / ts if not output_root.is_absolute(): output_root = repo_root / output_root output_root.mkdir(parents=True, exist_ok=True) + # Build target arguments for codex CLI target_args: list[str] = [] - target_desc: str if mode == "base": target_args += ["--base", base_branch] target_desc = f"base branch: {base_branch}" @@ -1648,265 +226,86 @@ def run_review(args: argparse.Namespace) -> int: target_args += ["--title", args.title] model_override = args.model or "" - if model_override: - target_args += ["-c", f'model="{model_override}"'] + # Discover rule files rule_files = discover_rule_files(repo_root, profile["rule_patterns"]) - (output_root / "enforced-rule-files.txt").write_text( - "\n".join(rule_files) + ("\n" if rule_files else ""), - encoding="utf-8", - ) + # Collect changed files changed_files = collect_changed_files(repo_root, mode, base_branch, commit_sha) - (output_root / "changed-files.txt").write_text( - "\n".join(changed_files) + ("\n" if changed_files else ""), - encoding="utf-8", - ) - modules = changed_modules(changed_files) - (output_root / "changed-modules.txt").write_text( - "\n".join([f"{count}\t{module}" for count, module in modules]) + ("\n" if modules else ""), - encoding="utf-8", - ) + # Collect hotspots numstat = collect_numstat(repo_root, mode, base_branch, commit_sha) - hotspots = [path for _, path in numstat[: depth_hotspots if depth_hotspots > 0 else 0]] - (output_root / "hotspots.txt").write_text( - "\n".join(hotspots) + ("\n" if hotspots else ""), - encoding="utf-8", - ) + hotspots = [path for _, path in numstat[:depth_hotspots] if depth_hotspots > 0] + # Handle empty diff if not changed_files: combined_report = output_root / "combined-report.md" - combined_report.write_text( - "\n".join( - [ - "# Codex PR-Grade Multi-Pass Review", - "", - f"- Generated: {dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%SZ')}", - f"- Repository context: {profile['repo_name']}", - f"- Target: {target_desc}", - f"- Domains: {','.join(selected_domains)}", - "- Changed files: 0", - "", - "No files detected for selected target.", - ] - ) - + "\n", - encoding="utf-8", - ) + write_empty_report(combined_report, profile, target_desc, selected_domains) print("No files detected for selected target.") print(f"Combined report: {combined_report}") return 0 - base_rubric = ( - f"Act as a strict PR gate reviewer for {profile['repo_name']}.\n" - "Return only actionable findings.\n\n" - "Enforcement order:\n" - "- AGENTS.md instructions\n" - "- Domain-specific internal rule files listed below\n" - "- Engineering correctness and risk\n\n" - "For each finding include:\n" - "- Severity: P0, P1, P2, or P3\n" - "- Type: Bug | Regression | Security | Concurrency | TestGap | RuleViolation\n" - "- File path\n" - "- Precise line number or tight line range\n" - "- Violated rule and rule file path (when applicable)\n" - "- Why this is risky\n" - "- Minimal fix direction\n\n" - "Do not output style-only comments unless they violate a required internal rule.\n" - f'If no findings, output exactly: "{NO_FINDINGS_SENTINEL}".' + # Build passes + pass_builder = PassBuilder( + profile=profile, + rule_files=rule_files, + changed_files=changed_files, + modules=modules, + hotspots=hotspots, + selected_domains=selected_domains, ) - - global_prompt = profile.get("global_prompt", "") - diff_context = build_diff_context(changed_files, modules, hotspots) - rules_block = rule_block(rule_files) - - def pass_prompt(extra: str) -> str: - parts = [base_rubric, rules_block, diff_context] - if global_prompt: - parts.append("Profile global context:\n" + global_prompt) - parts.append(extra) - return "\n\n".join([p for p in parts if p.strip()]) - - pipeline = profile["pipeline"] - passes: list[tuple[str, str, str]] = [] - pass_counter = 0 - - if pipeline.get("include_policy_pass", True): - pass_counter += 1 - passes.append( - ( - f"pass-{pass_counter}-policy-sweep", - "Policy: full standards coverage sweep", - pass_prompt(str(pipeline.get("policy_instructions", ""))), - ) - ) - - if pipeline.get("include_core_passes", True) and "core" in selected_domains: - core_passes = pipeline.get("core_passes") or [] - for core_pass in core_passes: - pass_id = sanitize_pass_id(str(core_pass.get("id", "core-pass"))) - pass_name = str(core_pass.get("name", pass_id)).strip() or pass_id - instructions = str(core_pass.get("instructions", "")).strip() - if not instructions: - continue - pass_counter += 1 - passes.append( - ( - f"pass-{pass_counter}-{pass_id}", - pass_name, - pass_prompt(instructions), - ) - ) - - if pipeline.get("include_domain_passes", True): - for domain in selected_domains: - if domain == "core": - continue - pass_counter += 1 - slug = sanitize_pass_id(domain) - passes.append( - ( - f"pass-{pass_counter}-domain-{slug}", - f"Domain: {domain}", - pass_prompt(domain_prompt(domain, profile)), - ) - ) - - if pipeline.get("include_depth_passes", True): - depth_template = str(pipeline.get("depth_instructions", DEFAULT_DEPTH_PASS_INSTRUCTIONS)) - for hotspot in hotspots: - pass_counter += 1 - hotspot_slug = sanitize_pass_id(hotspot.replace("/", "_")) - try: - depth_instructions = depth_template.format(hotspot=hotspot) - except Exception: - depth_instructions = DEFAULT_DEPTH_PASS_INSTRUCTIONS.format(hotspot=hotspot) - passes.append( - ( - f"pass-{pass_counter}-depth-{hotspot_slug}", - f"Depth hotspot: {hotspot}", - pass_prompt(depth_instructions), - ) - ) + passes = pass_builder.build_passes() if not passes: - die("no review passes configured; check profile.pipeline settings") - - summary_lines: list[str] = [] - raw_findings: list[dict[str, Any]] = [] - - for index, (pass_id, pass_name, prompt) in enumerate(passes, start=1): - out_file = output_root / f"{pass_id}.md" - print(f"\n==> ({index}/{len(passes)}) {pass_name}") - run_review_pass_with_compat( - repo_root=repo_root, - out_file=out_file, - target_args=target_args, - target_desc=target_desc, - prompt=prompt, - pass_name=pass_name, - ) - - text = out_file.read_text(encoding="utf-8", errors="replace") - parsed = parse_findings_from_pass(text, pass_id) - no_findings = pass_has_no_findings(text, parsed) - if not no_findings and not parsed: - parsed = [ - { - "severity": "P2", - "type": "UnparsedFinding", - "file_path": "(unparsed-output)", - "line_raw": "", - "line": None, - "rule": "", - "risk": "Pass output contained findings but did not match structured schema.", - "fix": "Ensure findings follow the required schema with Severity/Type/File path/Line fields.", - "title": pass_name, - "pass_id": pass_id, - } - ] - - if no_findings: - summary_lines.append(f"- [PASS] {pass_name}") - else: - summary_lines.append(f"- [FINDINGS] {pass_name}") - raw_findings.extend(parsed) - - (output_root / "pass-status.md").write_text("\n".join(summary_lines) + "\n", encoding="utf-8") + raise CodexwError("no review passes configured; check profile.pipeline settings") + + # Run passes + pass_runner = PassRunner( + repo_root=repo_root, + output_root=output_root, + target_args=target_args, + target_desc=target_desc, + model_override=model_override or None, + ) + summary_lines, raw_findings = pass_runner.run_all(passes) + + # Write support files + write_support_files( + output_root=output_root, + rule_files=rule_files, + changed_files=changed_files, + modules=modules, + hotspots=hotspots, + summary_lines=summary_lines, + ) + # Write findings JSON findings_json = output_root / "findings.json" - findings_json.write_text( - json.dumps( - { - "generated_utc": dt.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), - "target": target_desc, - "counts": { - "active": len(raw_findings), - }, - "active_findings": raw_findings, - }, - indent=2, - ) - + "\n", - encoding="utf-8", - ) + write_findings_json(findings_json, target_desc, raw_findings) + # Write combined report combined_report = output_root / "combined-report.md" - with combined_report.open("w", encoding="utf-8") as fh: - try: - profile_display = str(profile_path.relative_to(repo_root)) - except ValueError: - profile_display = str(profile_path) - - fh.write("# Codex PR-Grade Multi-Pass Review\n\n") - fh.write(f"- Generated: {dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%SZ')}\n") - fh.write(f"- Repository context: {profile['repo_name']}\n") - fh.write(f"- Target: {target_desc}\n") - fh.write(f"- Domains: {','.join(selected_domains)}\n") - fh.write(f"- Auto-enforced rule files: {len(rule_files)}\n") - fh.write(f"- Changed files: {len(changed_files)}\n") - fh.write(f"- Depth hotspots: {depth_hotspots}\n") - if args.title: - fh.write(f"- Title: {args.title}\n") - if model_override: - fh.write(f"- Model override: {model_override}\n") - fh.write(f"- Pass count: {len(passes)}\n") - fh.write(f"- Profile file: {profile_display}\n\n") - - fh.write("## Findings Summary\n\n") - fh.write(f"- Active findings: {len(raw_findings)}\n") - fh.write(f"- JSON artifact: {findings_json}\n\n") - - fh.write("## Pass Status\n\n") - fh.write("\n".join(summary_lines) + "\n\n") - - fh.write("## Auto-Enforced Rule Files\n\n") - if rule_files: - fh.write("\n".join(rule_files) + "\n\n") - else: - fh.write("(none discovered)\n\n") - - fh.write("## Changed Modules\n\n") - if modules: - fh.write("\n".join([f"{count}\t{module}" for count, module in modules]) + "\n\n") - else: - fh.write("(none)\n\n") - - fh.write("## Changed Files\n\n") - fh.write("\n".join(changed_files) + "\n\n") - - fh.write("## Hotspots\n\n") - fh.write(("\n".join(hotspots) if hotspots else "(none)") + "\n\n") - - for pass_file in sorted(output_root.glob("pass-*.md")): - fh.write(f"## {pass_file.stem}\n\n") - pass_text = pass_file.read_text(encoding="utf-8") - fh.write(pass_text) - if not pass_text.endswith("\n"): - fh.write("\n") - fh.write("\n") + write_combined_report( + path=combined_report, + profile=profile, + profile_path=profile_path, + repo_root=repo_root, + target_desc=target_desc, + selected_domains=selected_domains, + rule_files=rule_files, + changed_files=changed_files, + modules=modules, + hotspots=hotspots, + depth_hotspots=depth_hotspots, + pass_count=len(passes), + summary_lines=summary_lines, + raw_findings=raw_findings, + findings_json_path=findings_json, + output_root=output_root, + title=args.title, + model_override=model_override, + ) print("\nDone.") print(f"Per-pass outputs: {output_root}") @@ -1923,94 +322,20 @@ def pass_prompt(extra: str) -> str: return 0 -def shutil_which(name: str) -> str | None: - paths = os.environ.get("PATH", "").split(os.pathsep) - for directory in paths: - candidate = Path(directory) / name - if candidate.exists() and os.access(candidate, os.X_OK): - return str(candidate) - return None - - -def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - prog="codexw", - description="Generic, profile-aware Codex wrapper for local PR-grade review.", - ) - sub = parser.add_subparsers(dest="command") - - review = sub.add_parser( - "review", - help="Run profile-driven PR-grade multi-pass review.", - ) - review_pr = sub.add_parser( - "review-pr", - help="Alias for 'review' (kept for backward compatibility).", - ) - - def add_review_args(target_parser: argparse.ArgumentParser) -> None: - target_parser.add_argument("--profile", help="Path to local-review-profile.yaml", default=None) - mode = target_parser.add_mutually_exclusive_group() - mode.add_argument("--base", help="Base branch", default=None) - mode.add_argument("--uncommitted", action="store_true", help="Review uncommitted changes") - mode.add_argument("--commit", help="Review a specific commit SHA", default=None) - target_parser.add_argument("--domains", help="Comma-separated domain list", default=None) - target_parser.add_argument("--depth-hotspots", type=int, help="Number of hotspot depth passes") - target_parser.add_argument("--title", help="Optional review title", default=None) - target_parser.add_argument("--output-dir", help="Output directory for artifacts", default=None) - target_parser.add_argument("--model", help="Optional model override", default=None) - target_parser.add_argument( - "--print-effective-profile", - action="store_true", - help="Print normalized profile and exit (no review execution)", - ) - target_parser.add_argument( - "--bootstrap-only", - action="store_true", - help="Create missing profile (if needed) and exit", - ) - target_parser.add_argument( - "--sync-profile-only", - action="store_true", - help="Sync profile from repository signals and exit", - ) - target_parser.add_argument( - "--no-bootstrap-profile", - action="store_true", - help="Disable automatic profile generation when missing", - ) - target_parser.add_argument( - "--no-sync-profile", - action="store_true", - help="Disable automatic profile sync from repository signals", - ) - target_parser.add_argument( - "--no-prune-autogen", - action="store_true", - help="Keep stale auto-managed profile entries for this run", - ) - target_parser.add_argument("--fail-on-findings", action="store_true", help="Force strict gate") - target_parser.add_argument( - "--no-fail-on-findings", - action="store_true", - help="Exploratory mode; do not fail when findings exist", - ) - - add_review_args(review) - add_review_args(review_pr) - - return parser - - def main() -> int: + """Main entry point.""" parser = build_parser() args = parser.parse_args() - if args.command in {"review", "review-pr"}: - return run_review(args) - - parser.print_help() - return 1 + try: + if args.command in {"review", "review-pr"}: + return run_review(args) + + parser.print_help() + return 1 + except CodexwError as exc: + print(f"error: {exc}", file=sys.stderr) + return exc.code if __name__ == "__main__": diff --git a/codexw/architecture.md b/codexw/architecture.md new file mode 100644 index 0000000..0ec5ecb --- /dev/null +++ b/codexw/architecture.md @@ -0,0 +1,258 @@ +# Codexw Architecture + +This document describes the internal architecture of the `codexw` package — a profile-aware, multi-pass Codex CLI wrapper for local PR-grade code review. + +## Module Overview + +``` +┌──────────────────────────────────────────────────────────────┐ +│ __main__.py (orchestrator) │ +│ run_review() → build passes → run passes → write reports │ +└─────────────────────────────┬────────────────────────────────┘ + │ + ┌─────────────────────┼─────────────────────┐ + │ │ │ + ▼ ▼ ▼ + ┌─────────┐ ┌──────────┐ ┌───────────┐ + │ git.py │ │ passes.py│ │ profile.py│ + │ (changes)│ │ (execute)│ │ (config) │ + └─────────┘ └────┬─────┘ └───────────┘ + │ + ┌──────────────┼──────────────┐ + │ │ │ + ▼ ▼ ▼ + ┌──────────┐ ┌───────────┐ ┌───────────────┐ + │ RetryStr │ │ ModelFall │ │ PassBuilder │ + │ ategy │ │ backState │ │ PassRunner │ + └──────────┘ └───────────┘ └───────────────┘ +``` + +| Module | Responsibility | +| ------------------- | ------------------------------------- | +| `__init__.py` | Public API exports | +| `__main__.py` | Entry point, orchestration | +| `cli.py` | Argument parsing | +| `constants.py` | Default configs and constants | +| `utils.py` | Shared helpers, `CodexwError` | +| `git.py` | Git operations (changes, numstat) | +| `profile.py` | Profile load/sync/normalize/write | +| `passes.py` | Pass execution, model/effort fallback | +| `prompts.py` | Prompt construction | +| `finding_parser.py` | Extract findings from output | +| `reporting.py` | Write reports and artifacts | +| `yaml_fallback.py` | Fallback YAML parser (no PyYAML) | +| `yaml_writer.py` | Fallback YAML writer (no PyYAML) | + +## Data Flow + +``` +┌─────────────────┐ +│ CLI Arguments │ +│ (--base, etc) │ +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ ┌─────────────────┐ +│ Profile File │────▶│ normalize_ │ +│ (YAML/JSON) │ │ profile() │ +└─────────────────┘ └────────┬────────┘ + │ + ┌───────────────────────┘ + ▼ +┌─────────────────┐ ┌─────────────────┐ +│ Git: collect │────▶│ PassBuilder │ +│ changed files │ │ .build_passes()│ +└─────────────────┘ └────────┬────────┘ + │ + ▼ + ┌─────────────────┐ + │ PassRunner │ + │ .run_all() │ + └────────┬────────┘ + │ + ┌───────────────────────┼───────────────────────┐ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ codex review │ │ parse_findings │ │ write_combined │ +│ (per pass) │────▶│ _from_pass() │────▶│ _report() │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ +``` + +## Key Classes + +### PassSpec (dataclass, frozen) + +Immutable specification for a single review pass. + +```python +@dataclass(frozen=True) +class PassSpec: + id: str # Unique ID for filenames (e.g., "pass-1-policy-sweep") + name: str # Human-readable name (e.g., "Policy: full standards sweep") + prompt: str # Full prompt to send to Codex CLI +``` + +### ModelFallbackState (dataclass, mutable) + +Shared state across passes for model/effort resolution reuse. + +```python +@dataclass +class ModelFallbackState: + preferred_model: str | None = None # User-requested model + selected_model: str | None = None # Resolved working model + selected_effort: str | None = None # Resolved working effort +``` + +### PassBuilder + +Constructs `PassSpec` objects from profile configuration. + +- Reads pipeline config (policy/core/domain/depth pass toggles) +- Builds prompt from base rubric + rules + diff context + pass-specific instructions +- Returns `list[PassSpec]` + +### PassRunner + +Executes passes and collects results. + +- Iterates through `PassSpec` list +- Calls `run_review_pass_with_compat()` for each +- Parses findings, builds summary +- Returns `(summary_lines, raw_findings)` + +### RetryStrategy + +Static methods for detecting retryable error conditions: + +| Method | Detects | +| -------------------------------- | ---------------------------------- | +| `should_retry_with_compat()` | Prompt+target flag incompatibility | +| `model_unavailable()` | Missing/inaccessible model | +| `reasoning_effort_unsupported()` | Invalid reasoning effort level | + +## Resilience Strategy + +### Model Fallback Chain + +When `model_unavailable()` is detected, codexw walks a **recency-biased predecessor chain**: + +``` +gpt-5.3-codex → gpt-5.2-codex → gpt-5.1-codex → gpt-5-codex → gpt-4.2-codex + ↑ ↑ ↑ ↑ ↑ + same-major predecessors │ prior-major probes + └── (limited to 5 models total) +``` + +**Policy rationale:** + +- Recent models are more likely to be available +- Avoids drifting into obsolete model tails +- 5-model window prevents runaway fallback + +### Reasoning Effort Fallback + +When `reasoning_effort_unsupported()` is detected, codexw downgrades effort: + +``` +xhigh → high → medium → low → minimal +``` + +**Detection signals:** + +- Structured JSON error with `param: "reasoning.effort"` +- Error message mentioning `model_reasoning_effort` +- "unsupported", "not supported", "invalid value" keywords + +### State Persistence + +Once a working model+effort pair is found, `ModelFallbackState` preserves it for subsequent passes in the same run. This avoids repeated fallback overhead. + +## Pass Pipeline + +Default pipeline runs 4 pass categories: + +| Category | Purpose | +| ------------------- | -------------------------------------------- | +| **Policy** | Enforce all discovered rule files | +| **Core** (4 passes) | Breadth → Regressions → Architecture → Tests | +| **Domain** | Per-domain focused review | +| **Depth** | Hotspot files (top N by churn) | + +Each pass type can be toggled via `pipeline.include_*` flags. + +## Profile Sync + +On each run (unless `--no-sync-profile`), codexw: + +1. Infers signals from repository (rules, domains, prompts) +2. Merges with existing profile, preserving manual edits +3. Prunes stale auto-managed entries +4. Writes updated profile back + +Auto-managed entries are tracked in `profile_meta.autogen` to distinguish them from manual edits. + +## Extension Points + +### Adding a New Pass Type + +1. Add toggle in `constants.py` (e.g., `include_security_pass`) +2. Extend `PassBuilder.build_passes()` to construct new `PassSpec` +3. No changes needed in `PassRunner` — it executes any `PassSpec` + +### Adding a New Fallback Strategy + +1. Add detection method to `RetryStrategy` +2. Extend `run_review_pass_with_fallback()` to check and handle it +3. Add corresponding test case + +### Adding a New Profile Field + +1. Add default in `constants.py` +2. Handle in `normalize_profile()` (profile.py) +3. Handle in `sync_profile_with_repo()` if auto-synced +4. Update example profiles + +## Testing + +Primary test file: `test/codexw_test.py` + +| Category | Coverage | +| -------------------- | -------------------------------------------- | +| YAML parsing | Flow lists, comments, nulls, quotes | +| Profile bootstrap | Domain inference, generic prompts | +| Model fallback | Chain construction, retry behavior | +| Effort fallback | Detection, downgrade sequence | +| State persistence | Cross-pass reuse | +| Error classification | Structured/unstructured retry signal parsing | + +Canonical local verification: + +```bash +# Targeted codexw unit coverage +python3 test/codexw_test.py -q + +# Python syntax sanity for codexw modules +python3 -m py_compile codexw/*.py + +# Full repository validation (includes sync-ai-rules + formatting checks) +make test +``` + +Optional equivalent unit command: + +```bash +python3 -m pytest test/codexw_test.py -v +``` + +## Dependencies + +**Required:** + +- Python 3.9+ +- `codex` CLI in PATH + +**Optional:** + +- `PyYAML` (for full YAML support; fallback parser handles common cases) diff --git a/codexw/cli.py b/codexw/cli.py new file mode 100644 index 0000000..8396cca --- /dev/null +++ b/codexw/cli.py @@ -0,0 +1,87 @@ +"""CLI argument parsing for codexw. + +This module handles command-line argument parsing. +Keeps the main entry point clean. +""" + +from __future__ import annotations + +import argparse + + +def build_parser() -> argparse.ArgumentParser: + """Build the argument parser for codexw.""" + parser = argparse.ArgumentParser( + prog="codexw", + description="Generic, profile-aware Codex wrapper for local PR-grade review.", + ) + sub = parser.add_subparsers(dest="command") + + review = sub.add_parser( + "review", + help="Run profile-driven PR-grade multi-pass review.", + ) + review_pr = sub.add_parser( + "review-pr", + help="Alias for 'review' (kept for backward compatibility).", + ) + + def add_review_args(target_parser: argparse.ArgumentParser) -> None: + target_parser.add_argument( + "--profile", help="Path to local-review-profile.yaml", default=None + ) + mode = target_parser.add_mutually_exclusive_group() + mode.add_argument("--base", help="Base branch", default=None) + mode.add_argument("--uncommitted", action="store_true", help="Review uncommitted changes") + mode.add_argument("--commit", help="Review a specific commit SHA", default=None) + target_parser.add_argument("--domains", help="Comma-separated domain list", default=None) + target_parser.add_argument( + "--depth-hotspots", type=int, help="Number of hotspot depth passes" + ) + target_parser.add_argument("--title", help="Optional review title", default=None) + target_parser.add_argument( + "--output-dir", help="Output directory for artifacts", default=None + ) + target_parser.add_argument("--model", help="Optional model override", default=None) + target_parser.add_argument( + "--print-effective-profile", + action="store_true", + help="Print normalized profile and exit (no review execution)", + ) + target_parser.add_argument( + "--bootstrap-only", + action="store_true", + help="Create missing profile (if needed) and exit", + ) + target_parser.add_argument( + "--sync-profile-only", + action="store_true", + help="Sync profile from repository signals and exit", + ) + target_parser.add_argument( + "--no-bootstrap-profile", + action="store_true", + help="Disable automatic profile generation when missing", + ) + target_parser.add_argument( + "--no-sync-profile", + action="store_true", + help="Disable automatic profile sync from repository signals", + ) + target_parser.add_argument( + "--no-prune-autogen", + action="store_true", + help="Keep stale auto-managed profile entries for this run", + ) + gate_mode = target_parser.add_mutually_exclusive_group() + gate_mode.add_argument("--fail-on-findings", action="store_true", help="Force strict gate") + gate_mode.add_argument( + "--no-fail-on-findings", + action="store_true", + help="Exploratory mode; do not fail when findings exist", + ) + + add_review_args(review) + add_review_args(review_pr) + + return parser diff --git a/codexw/codexw-features-and-usecases.md b/codexw/codexw-features-and-usecases.md deleted file mode 100644 index baaf3eb..0000000 --- a/codexw/codexw-features-and-usecases.md +++ /dev/null @@ -1,97 +0,0 @@ -# Codexw Features + Use Cases - -## 1) Quick local review hook (`codex-review`) -`codex-review` runs plain `codex review` from pre-commit manual stage. -It gives a fast, low-friction local review path. -Use this for quick sanity checks before push. -Why this matters: fast feedback without waiting for PR-grade orchestration. - -## 2) PR-grade local review hook (`codex-review-pr-grade`) -`codex-review-pr-grade` runs `./codexw/__main__.py review`. -It executes profile-driven, multi-pass review instead of one generic pass. -Use this before opening or updating a PR. -Why this matters: deeper, more consistent local review quality. - -## 3) Rule pattern validation at startup -`codexw` reads `rules.include` from `local-review-profile.yaml` (for example `AGENTS.md`, `*.mdc`). -At startup, it resolves each pattern and checks if real files exist. -If a pattern/file does not resolve, it prints a warning and removes that pattern from effective enforcement for that run. -Why this matters: avoids silent “rules are enforced” assumptions when paths are stale or misconfigured. - -## 4) Fallback YAML parser/writer (no `PyYAML` required) -`codexw` can read/write profile YAML even when `PyYAML` is not installed. -This keeps profile bootstrap, sync, and review runnable across varied machines and CI images. -Use this when environment dependencies are minimal or inconsistent. -Why this matters: review workflow stays operational without extra setup. - -## 5) Hardened fallback parsing semantics -Fallback parsing supports flow lists, inline comments, null values, quoted scalars, and escape handling. -It is designed to preserve effective config correctness in real-world YAML formatting. -Use this when profiles include compact YAML forms and comments. -Why this matters: prevents silent config drift that can change domains, gating, or scope. - -## 6) Target scope control (`--base`, `--uncommitted`, `--commit`) -`codexw` can review a branch diff, local dirty state, or a specific commit. -This limits analysis to the intended change window. -Use `--uncommitted` during iteration and `--base` for pre-merge validation. -Why this matters: less noise, more relevant findings. - -## 7) Profile bootstrap and sync (`--bootstrap-only`, `--sync-profile-only`) -If profile is missing, bootstrap creates it from repository signals. -Sync refreshes auto-managed parts (rules/domains/prompts) while preserving manual edits. -Use this during onboarding and rule evolution. -Why this matters: less manual maintenance and better policy consistency. - -## 8) Sync controls (`--no-sync-profile`, `--no-prune-autogen`) -These flags let teams freeze profile behavior for a run. -`--no-sync-profile` skips sync; `--no-prune-autogen` keeps stale auto-managed entries. -Use this for debugging or controlled rollout. -Why this matters: safer troubleshooting when behavior changes are under investigation. - -## 9) Domain-focused review (`--domains core,testing`) -`--domains` filters which domain passes run and which domain prompts are applied. -Backend execution is still `codex review`, but wrapper orchestration changes what is asked and how passes are executed. -Use this for targeted work (for example testing-heavy changes). -Why this matters: concentrates runtime budget on highest-value domains. - -## 10) Wrapper enhancement over plain `codex review` -`codexw` adds orchestration around `codex review`: pass planning, prompt composition, rule context injection, parsing, reporting, and gating. -So one backend engine becomes a structured local review pipeline. -Use this when chat-style one-pass review is not enough. -Why this matters: improves repeatability and depth. - -## 11) Multi-pass pipeline (policy/core/domain/depth) -`codexw` runs specialized passes instead of a single flat prompt. -Each pass targets different risk classes and coverage goals. -Use this on complex or high-impact diffs. -Why this matters: better recall and fewer blind spots. - -## 12) Hotspot depth analysis (`--depth-hotspots`) -Hotspots are inferred from changed-line churn and reviewed with extra depth passes. -This prioritizes files with higher defect likelihood. -Use this for large diffs with uneven risk distribution. -Why this matters: deeper scrutiny where it most likely pays off. - -## 13) Gating modes (`--fail-on-findings`, `--no-fail-on-findings`) -`codexw` can fail non-zero on findings or run in advisory mode. -This supports both strict gate and exploratory feedback workflows. -Use fail mode for merge readiness and advisory mode during early iteration. -Why this matters: one tool fits multiple workflow stages. - -## 14) Effective profile inspection (`--print-effective-profile`) -This prints normalized runtime profile after loading/sync/validation and exits. -No review passes are executed. -Use this to verify domains, base branch, gating settings, and resolved rule patterns. -Why this matters: configuration behavior is inspectable before full execution. - -## 15) Structured review artifacts -Outputs include per-pass markdown, combined report, findings JSON, changed files/modules, hotspots, and enforced rule inventory. -These artifacts support debugging, review handoff, and automation. -Use this when teams need both human-readable and machine-readable outputs. -Why this matters: easier triage, auditing, and tooling integration. - -## 16) Compatibility retry for CLI prompt/target constraints -If a Codex CLI variant rejects prompt+target combinations, `codexw` retries with a compatible path. -This avoids hard failures due to client capability differences. -Use this across mixed developer environments. -Why this matters: more reliable local execution across CLI versions. diff --git a/codexw/constants.py b/codexw/constants.py new file mode 100644 index 0000000..c4539c8 --- /dev/null +++ b/codexw/constants.py @@ -0,0 +1,128 @@ +"""Constants and default configurations for codexw. + +This module contains all magic numbers, default pass specifications, and +sentinel values used throughout the codexw package. Centralizing these +makes the codebase easier to understand and maintain. +""" + +from __future__ import annotations + +# Sentinel string that Codex outputs when no actionable findings exist. +# Used for pass success/failure detection. +NO_FINDINGS_SENTINEL = "No actionable findings." + +# Default global prompt injected into all review passes. +# Provides baseline review guidance without repo-specific context. +DEFAULT_GLOBAL_PROMPT = ( + "Use repository standards for lifecycle, state, architecture boundaries, and " + "production-safety. Prioritize behavior-changing issues and policy violations " + "over style-only comments." +) + +# Policy pass instructions template. +# The policy pass enforces all discovered rule files and outputs coverage. +DEFAULT_POLICY_PASS_INSTRUCTIONS = ( + "Task:\n" + "- Enforce every standard file listed above.\n" + "- Output a 'Rule Coverage' section with one line per rule file:\n" + " :: Covered | NotApplicable :: short reason\n" + "- Then output actionable findings using the required schema.\n" + f"- If no actionable findings exist, include exactly this line: {NO_FINDINGS_SENTINEL}" +) + +# Core passes run for the "core" domain. Each pass focuses on a different +# risk class to improve recall and reduce blind spots. +# +# Rationale for 4 passes: +# 1. core-breadth: Ensures every changed file is touched at least once. +# Catches obvious issues across the entire diff surface. +# 2. core-regressions: Focuses on behavioral changes, crashes, and security. +# These are the highest-impact bugs that escape code review. +# 3. core-architecture: Focuses on structural issues - boundaries, concurrency, +# lifecycle. Harder to spot but cause long-term maintenance pain. +# 4. core-tests: Focuses on test coverage gaps. Missing tests allow future +# regressions to slip through. +DEFAULT_CORE_PASS_SPECS: list[dict[str, str]] = [ + { + "id": "core-breadth", + "name": "Core 1: breadth coverage across all changed files", + "instructions": ( + "Task:\n" + "- Perform full-breadth review across every changed file listed above.\n" + "- Output a 'Breadth Coverage' section with one line per changed file:\n" + " :: Reviewed | NotApplicable :: short reason\n" + "- Then output actionable findings using the required schema.\n" + f"- If no actionable findings exist, include exactly this line: {NO_FINDINGS_SENTINEL}" + ), + }, + { + "id": "core-regressions", + "name": "Core 2: regressions/security/crash scan", + "instructions": ( + "Focus areas:\n" + "- behavioral regressions\n" + "- crash/nullability risks\n" + "- state corruption and data-loss risks\n" + "- security and privacy issues" + ), + }, + { + "id": "core-architecture", + "name": "Core 3: architecture/concurrency scan", + "instructions": ( + "Focus areas:\n" + "- architecture boundaries and dependency misuse\n" + "- lifecycle and concurrency/threading issues\n" + "- error-handling/fallback correctness\n" + "- protocol/contract boundary failures" + ), + }, + { + "id": "core-tests", + "name": "Core 4: test-coverage scan", + "instructions": ( + "Focus areas:\n" + "- missing tests required to protect the change\n" + "- high-risk edge cases without coverage\n" + "- regressions likely to escape without tests" + ), + }, +] + +# Depth pass instructions template. +# {hotspot} is replaced with the actual file path at runtime. +DEFAULT_DEPTH_PASS_INSTRUCTIONS = ( + "Task:\n" + "- Perform depth-first review of hotspot file: {hotspot}\n" + "- Traverse directly related changed call paths\n" + "- Prioritize subtle behavioral, concurrency, state, and boundary-condition failures\n" + "- Output only actionable findings with required schema\n" + f"- If no actionable findings exist, include exactly this line: {NO_FINDINGS_SENTINEL}" +) + +# Default review configuration values. +DEFAULT_BASE_BRANCH = "main" +DEFAULT_DEPTH_HOTSPOTS = 3 +DEFAULT_OUTPUT_ROOT = ".codex/review-runs" +DEFAULT_STRICT_GATE = True + +# Default rule patterns to search for when no profile exists. +DEFAULT_RULE_PATTERNS = ("AGENTS.md", ".cursor/rules/**/*.mdc") + +# Model fallback and compatibility handling constants. +# These are used by pass execution retry logic. +REASONING_EFFORT_ORDER = ("xhigh", "high", "medium", "low", "minimal") +DEFAULT_MODEL_FALLBACK_WINDOW = 5 +PREVIOUS_MAJOR_MINOR_CANDIDATES = (2, 1) +REASONING_PARAM_HINTS = {"model_reasoning_effort", "reasoning.effort", "reasoning_effort"} +MODEL_UNAVAILABLE_CODE_HINTS = { + "model_not_found", + "unknown_model", + "model_unavailable", + "unsupported_model", + "model_not_supported", +} +COMPAT_CODE_HINTS = { + "unsupported_option_combination", + "unsupported_argument_combination", +} diff --git a/codexw/features-and-usecases.md b/codexw/features-and-usecases.md new file mode 100644 index 0000000..b309f99 --- /dev/null +++ b/codexw/features-and-usecases.md @@ -0,0 +1,89 @@ +# Codexw Features + Use Cases + +This document describes the local review capabilities provided by `codexw` and associated pre-commit hooks. + +## Review Paths + +1. **Quick review (`codex-review`)** + Runs plain `codex review` from manual pre-commit stage for fast local sanity checks before push. + Why it matters: lowest-latency feedback path. + +2. **PR-grade review (`codex-review-pr-grade`)** + Runs `./codexw/__main__.py review` with profile-driven multi-pass orchestration. + Why it matters: deeper and more consistent review than one-shot prompts. + +## Profile + Policy Controls + +3. **Rule pattern validation at startup** + Resolves `rules.include` entries and warns on missing/unmatched patterns; invalid patterns are dropped for that run. + Why it matters: prevents false assumptions that stale rules are being enforced. + +4. **Profile bootstrap + sync** (`--bootstrap-only`, `--sync-profile-only`) + Auto-generates missing profile and syncs repository-derived signals (rules/domains/prompts) while preserving manual edits. + Why it matters: reduces manual profile drift across repos. + +5. **Sync safety toggles** (`--no-sync-profile`, `--no-prune-autogen`) + Allows freezing sync behavior for debugging or controlled rollouts. + Why it matters: makes behavior changes diagnosable and reversible. + +6. **Effective profile inspection** (`--print-effective-profile`) + Prints normalized runtime profile and exits without running review passes. + Why it matters: configuration is inspectable before expensive execution. + +## Execution Scope + Quality Depth + +7. **Target scope control** (`--base`, `--uncommitted`, `--commit`) + Limits review to the intended change window: branch diff, dirty state, or specific commit. + Why it matters: less noise, higher relevance. + +8. **Domain-focused passes** (`--domains core,testing`) + Runs only selected domain passes and domain prompts for targeted work. + Why it matters: runtime budget is focused on high-value domains. + +9. **Multi-pass pipeline** (policy/core/domain/depth) + Executes specialized passes instead of one flat prompt to improve breadth + depth. + Why it matters: better recall across different risk classes. + +10. **Hotspot depth analysis** (`--depth-hotspots`) + Uses churn-derived hotspots for extra depth passes on high-risk files. + Why it matters: additional scrutiny where defects are more likely. + +11. **Gating modes** (`--fail-on-findings`, `--no-fail-on-findings`) + Supports strict non-zero gate or advisory mode for earlier iteration. + Why it matters: same tool works across dev and pre-merge stages. + +## Resilience + Portability + +12. **Fallback YAML parser/writer** (no `PyYAML` required) + Profile read/write still works without `PyYAML`; includes coverage for flow lists, comments, nulls, and quoted scalars. + Why it matters: workflow remains portable across minimal environments. + +13. **CLI compatibility retry** + If CLI rejects prompt+target combinations, wrapper retries in a compatible path. + Why it matters: fewer environment-specific hard failures. + +14. **Model fallback (recency-biased)** + If model is unavailable, fallback is attempted within a fixed recent 5-model window from the original requested model (for example `gpt-5.3-codex -> gpt-5.2-codex -> gpt-5.1-codex -> gpt-5-codex -> gpt-4.2-codex`). + Why it matters: avoids drifting into obsolete model tails. + +15. **Reasoning-effort fallback** + If reasoning effort is unsupported, effort is downgraded (`xhigh -> high -> medium -> low`) using structured error signals and tolerant parsing. + Why it matters: resilient behavior across CLI/API message format changes. + +16. **Resolved model/effort reuse within run** + Once a working pair is found, subsequent passes reuse it in the same review run. + Why it matters: reduces repeated failure/retry overhead. + +## Outputs + Integration + +17. **Structured artifacts** + Emits per-pass markdown, combined report, findings JSON, and support files (`changed-files`, `modules`, `hotspots`, `enforced rules`, `pass status`). + Why it matters: supports human triage plus machine automation. + +18. **Wrapper orchestration over plain `codex review`** + Adds pass planning, prompt composition, rule injection, finding parsing, reporting, and gating around the same backend CLI engine. + Why it matters: local review quality approaches dedicated PR-review workflows. + +--- + +For internal architecture details, see [architecture.md](./architecture.md). diff --git a/codexw/finding_parser.py b/codexw/finding_parser.py new file mode 100644 index 0000000..79a371a --- /dev/null +++ b/codexw/finding_parser.py @@ -0,0 +1,119 @@ +"""Finding parser for codexw. + +This module extracts structured findings from Codex review output. +Keeps parsing logic isolated from orchestration. +""" + +from __future__ import annotations + +import re +from typing import Any + +from .constants import NO_FINDINGS_SENTINEL + + +def extract_line_number(raw: str) -> int | None: + """Extract line number from a line reference string.""" + match = re.search(r"\d+", raw) + if not match: + return None + try: + number = int(match.group(0)) + except ValueError: + return None + return number if number > 0 else None + + +def normalize_finding_line(raw_line: str) -> str: + """Normalize a finding line by removing markdown formatting.""" + line = raw_line.strip() + if not line: + return "" + + # Remove bullet points and numbering + line = re.sub(r"^[-*+]\s*", "", line) + line = re.sub(r"^\d+[.)]\s*", "", line) + + # Remove bold/code formatting + line = re.sub(r"^\*\*([^*]+)\*\*\s*", r"\1 ", line) + line = re.sub(r"^__([^_]+)__\s*", r"\1 ", line) + line = re.sub(r"^`([^`]+)`\s*", r"\1 ", line) + + # Normalize spacing around colons + line = re.sub(r"\s+:\s*", ": ", line, count=1) + return line + + +def parse_findings_from_pass(text: str, pass_id: str) -> list[dict[str, Any]]: + """Parse structured findings from pass output text.""" + findings: list[dict[str, Any]] = [] + current: dict[str, Any] | None = None + + def flush() -> None: + nonlocal current + if not current: + return + severity = str(current.get("severity", "")).strip().upper() + file_path = str(current.get("file_path", "")).strip() + if severity and file_path: + current["pass_id"] = pass_id + current["line"] = extract_line_number(str(current.get("line_raw", ""))) + findings.append(current) + current = None + + for raw_line in text.splitlines(): + line = normalize_finding_line(raw_line) + if not line: + continue + if NO_FINDINGS_SENTINEL in line: + continue + + # New finding starts with Severity + severity_match = re.match(r"(?i)^severity\s*:\s*(P[0-3])\b", line) + if severity_match: + flush() + current = { + "severity": severity_match.group(1).upper(), + "type": "", + "file_path": "", + "line_raw": "", + "rule": "", + "risk": "", + "fix": "", + "title": "", + } + continue + + if not current: + continue + + # Parse finding fields + if re.match(r"(?i)^type\s*:", line): + current["type"] = line.split(":", 1)[1].strip() + elif re.match(r"(?i)^(file\s*path|path|file)\s*:", line): + current["file_path"] = line.split(":", 1)[1].strip() + elif re.match(r"(?i)^(line|line\s*number|precise line number|line range)\s*:", line): + current["line_raw"] = line.split(":", 1)[1].strip() + elif re.match(r"(?i)^violated rule", line): + current["rule"] = line.split(":", 1)[1].strip() + elif re.match(r"(?i)^why this is risky\s*:", line): + current["risk"] = line.split(":", 1)[1].strip() + elif re.match(r"(?i)^minimal fix direction\s*:", line): + current["fix"] = line.split(":", 1)[1].strip() + elif re.match(r"(?i)^title\s*:", line): + current["title"] = line.split(":", 1)[1].strip() + # Continuation of risk description + elif current.get("risk"): + current["risk"] = f"{current['risk']} {line}".strip() + + flush() + return findings + + +def pass_has_no_findings(text: str, parsed_findings: list[dict[str, Any]] | None = None) -> bool: + """Check if pass output indicates no actionable findings.""" + if NO_FINDINGS_SENTINEL not in text: + return False + if parsed_findings is None: + parsed_findings = parse_findings_from_pass(text, "probe") + return len(parsed_findings) == 0 diff --git a/codexw/git.py b/codexw/git.py new file mode 100644 index 0000000..d505a5b --- /dev/null +++ b/codexw/git.py @@ -0,0 +1,147 @@ +"""Git operations for codexw. + +This module encapsulates all git-related functionality, providing a clean +interface for the rest of the codebase. Keeps git commands isolated from +review logic. +""" + +from __future__ import annotations + +import subprocess +from pathlib import Path + +from .utils import CodexwError, run_checked + + +def list_untracked_files(repo_root: Path) -> list[str]: + """Return untracked file paths relative to repo root.""" + out = run_checked( + ["git", "ls-files", "--others", "--exclude-standard"], + repo_root, + ) + return sorted({line.strip() for line in out.splitlines() if line.strip()}) + + +def count_file_lines(path: Path) -> int: + """Best-effort line count for a file on disk.""" + try: + with path.open("rb") as fh: + newline_count = 0 + saw_any_bytes = False + ends_with_newline = False + + while True: + chunk = fh.read(64 * 1024) + if not chunk: + break + saw_any_bytes = True + newline_count += chunk.count(b"\n") + ends_with_newline = chunk.endswith(b"\n") + except OSError: + return 0 + if not saw_any_bytes: + return 0 + return newline_count + (0 if ends_with_newline else 1) + + +def find_repo_root(start: Path) -> Path: + """Find the git repository root from a starting path.""" + try: + out = run_checked(["git", "rev-parse", "--show-toplevel"], start).strip() + if out: + return Path(out) + except CodexwError: + pass + return start + + +def git_ref_exists(repo_root: Path, ref: str) -> bool: + """Check if a git ref exists.""" + proc = subprocess.run( + ["git", "show-ref", "--verify", "--quiet", ref], + cwd=str(repo_root), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + text=True, + check=False, + ) + return proc.returncode == 0 + + +def detect_default_base(repo_root: Path) -> str: + """Detect the default base branch (master or main).""" + # Check local branches first, then remote + candidates = ["master", "main"] + ref_types = ["refs/heads/{}", "refs/remotes/origin/{}"] + + for ref_template in ref_types: + for candidate in candidates: + if git_ref_exists(repo_root, ref_template.format(candidate)): + return candidate + + return "main" + + +def collect_changed_files(repo_root: Path, mode: str, base: str, commit: str) -> list[str]: + """Collect list of changed files based on review mode.""" + if mode == "base": + out = run_checked(["git", "diff", "--name-only", f"{base}...HEAD"], repo_root) + return sorted({line.strip() for line in out.splitlines() if line.strip()}) + + if mode == "uncommitted": + out1 = run_checked(["git", "diff", "--name-only", "HEAD"], repo_root) + out2 = "\n".join(list_untracked_files(repo_root)) + return sorted({line.strip() for line in (out1 + "\n" + out2).splitlines() if line.strip()}) + + if mode == "commit": + out = run_checked(["git", "show", "--name-only", "--pretty=", commit], repo_root) + return sorted({line.strip() for line in out.splitlines() if line.strip()}) + + raise CodexwError(f"unsupported mode: {mode}") + + +def collect_numstat(repo_root: Path, mode: str, base: str, commit: str) -> list[tuple[int, str]]: + """Collect file change statistics (added + deleted lines per file).""" + if mode == "base": + cmd = ["git", "diff", "--numstat", f"{base}...HEAD"] + elif mode == "uncommitted": + cmd = ["git", "diff", "--numstat", "HEAD"] + elif mode == "commit": + cmd = ["git", "show", "--numstat", "--pretty=", commit] + else: + raise CodexwError(f"unsupported mode: {mode}") + + out = run_checked(cmd, repo_root) + changes_by_path: dict[str, int] = {} + for raw in out.splitlines(): + parts = raw.split("\t") + if len(parts) < 3: + continue + add_raw, del_raw, path = parts[0], parts[1], parts[2] + add = int(add_raw) if add_raw.isdigit() else 0 + rem = int(del_raw) if del_raw.isdigit() else 0 + changes_by_path[path] = add + rem + + # `git diff --numstat HEAD` excludes untracked files; include them so + # hotspot depth passes can prioritize newly added files during local review. + if mode == "uncommitted": + for rel_path in list_untracked_files(repo_root): + if rel_path in changes_by_path: + continue + changes_by_path[rel_path] = count_file_lines(repo_root / rel_path) + + rows = [(delta, path) for path, delta in changes_by_path.items()] + rows.sort(key=lambda x: x[0], reverse=True) + return rows + + +def changed_modules(changed_files: list[str]) -> list[tuple[int, str]]: + """Group changed files by top-level module (first 2 path components).""" + counts: dict[str, int] = {} + for path in changed_files: + parts = path.split("/") + key = "/".join(parts[:2]) if len(parts) >= 2 else parts[0] + counts[key] = counts.get(key, 0) + 1 + rows = [(count, module) for module, count in counts.items()] + rows.sort(key=lambda x: (-x[0], x[1])) + return rows diff --git a/codexw/local-review-profile.example.yaml b/codexw/local-review-profile.repo.example.yaml similarity index 100% rename from codexw/local-review-profile.example.yaml rename to codexw/local-review-profile.repo.example.yaml diff --git a/codexw/passes.py b/codexw/passes.py new file mode 100644 index 0000000..f8e8e65 --- /dev/null +++ b/codexw/passes.py @@ -0,0 +1,619 @@ +"""Pass orchestration for codexw. + +This module handles the execution of review passes against the Codex CLI. +Includes retry logic for: +- CLI prompt+target compatibility issues +- Model availability fallback (recursive predecessor chain) +- Reasoning-effort fallback when model-specific settings are unsupported +""" + +from __future__ import annotations + +import re +import sys +from collections import deque +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from .constants import ( + COMPAT_CODE_HINTS, + DEFAULT_DEPTH_PASS_INSTRUCTIONS, + DEFAULT_MODEL_FALLBACK_WINDOW, + MODEL_UNAVAILABLE_CODE_HINTS, + PREVIOUS_MAJOR_MINOR_CANDIDATES, + REASONING_EFFORT_ORDER, + REASONING_PARAM_HINTS, +) +from .finding_parser import parse_findings_from_pass, pass_has_no_findings +from .prompts import ( + build_base_rubric, + build_diff_context, + build_domain_prompt, + build_pass_prompt, + build_rule_block, +) +from .utils import CodexwError, run_captured, sanitize_pass_id + +MODEL_NAME_RE = re.compile(r"\b(gpt-\d+(?:\.\d+)?-codex)\b", re.IGNORECASE) +ERROR_CODE_RE = re.compile(r'(?im)(?:"code"|code)\s*[:=]\s*["\']?([a-z0-9_.-]+)') +ERROR_PARAM_RE = re.compile(r'(?im)(?:"param"|param(?:eter)?)\s*[:=]\s*["\']?([a-z0-9_.-]+)') + + +def extract_error_codes_and_params(output: str) -> tuple[set[str], set[str]]: + """Extract structured error code/param hints from mixed CLI output.""" + text = output.lower() + codes = {m.group(1).strip().lower() for m in ERROR_CODE_RE.finditer(text)} + params = {m.group(1).strip().lower() for m in ERROR_PARAM_RE.finditer(text)} + return codes, params + + +@dataclass(frozen=True) +class PassSpec: + """Specification for a single review pass. + + Attributes: + id: Unique identifier for the pass (used in filenames) + name: Human-readable name (displayed during execution) + prompt: The full prompt to send to Codex CLI + """ + + id: str + name: str + prompt: str + + +@dataclass +class ModelFallbackState: + """Mutable state shared across passes for model/effort fallback reuse.""" + + preferred_model: str | None = None + selected_model: str | None = None + selected_effort: str | None = None + + +class RetryStrategy: + """Strategy for retrying failed Codex CLI calls.""" + + @staticmethod + def should_retry_with_compat(output: str) -> bool: + """Check if failure indicates prompt+target incompatibility.""" + text = output.lower() + codes, _ = extract_error_codes_and_params(text) + if codes & COMPAT_CODE_HINTS: + return True + if "cannot be used with '[prompt]'" in text: + return True + if "cannot be used with" in text and "[prompt]" in text: + return True + return False + + @staticmethod + def model_unavailable(output: str) -> bool: + """Check if failure indicates missing/inaccessible model.""" + if RetryStrategy.reasoning_effort_unsupported(output): + return False + text = output.lower() + codes, _ = extract_error_codes_and_params(text) + if codes & MODEL_UNAVAILABLE_CODE_HINTS: + return True + + return ( + "model_not_found" in text + or "does not exist or you do not have access to it" in text + or ("model" in text and "not supported" in text) + or ("model" in text and "unsupported" in text) + or ("model" in text and "unknown" in text) + or ("model" in text and "not found" in text) + or ("model" in text and "unavailable" in text) + ) + + @staticmethod + def reasoning_effort_unsupported(output: str) -> bool: + """Check if failure indicates unsupported model_reasoning_effort.""" + text = output.lower() + _, params = extract_error_codes_and_params(text) + if params & REASONING_PARAM_HINTS: + return True + + if not any( + marker in text + for marker in ("model_reasoning_effort", "reasoning.effort", "reasoning effort") + ): + if not ( + ("reasoning" in text or "effort" in text) + and any(e in text for e in REASONING_EFFORT_ORDER) + ): + return False + return ( + "unsupported" in text + or "not supported" in text + or "invalid value" in text + or "must be one of" in text + or "supported values" in text + ) + + +def normalize_model_name(model: str | None) -> str | None: + """Normalize model name to lowercase, or None when empty.""" + if not model: + return None + normalized = str(model).strip().lower() + return normalized or None + + +def build_model_fallback_chain( + start_model: str, + *, + max_models: int = DEFAULT_MODEL_FALLBACK_WINDOW, +) -> list[str]: + """Build recency-biased predecessor chain for a Codex model. + + Policy: + - Keep fallback focused on recent models to avoid obsolete tails. + - Prefer same-major predecessors first. + - Then probe likely recent variants from prior major(s): .2, .1, base. + """ + model = normalize_model_name(start_model) + if not model: + return [] + if max_models <= 0: + return [] + + match = re.fullmatch(r"gpt-(\d+)(?:\.(\d+))?-codex", model) + if not match: + return [model] + + major = int(match.group(1)) + minor = int(match.group(2)) if match.group(2) is not None else None + + chain: list[str] = [] + seen: set[str] = set() + + def append_candidate(candidate: str) -> bool: + if candidate in seen: + return False + seen.add(candidate) + chain.append(candidate) + return len(chain) >= max_models + + append_candidate(model) + + # Same-major predecessors (e.g. 5.3 -> 5.2 -> 5.1 -> 5) + if minor is not None: + for prev_minor in range(minor - 1, 0, -1): + if append_candidate(f"gpt-{major}.{prev_minor}-codex"): + return chain + if append_candidate(f"gpt-{major}-codex"): + return chain + + # Prior major recency probes (include .2 explicitly, then .1, then base). + prev_major = major - 1 + while prev_major >= 1 and len(chain) < max_models: + for prev_minor in PREVIOUS_MAJOR_MINOR_CANDIDATES: + if append_candidate(f"gpt-{prev_major}.{prev_minor}-codex"): + return chain + if append_candidate(f"gpt-{prev_major}-codex"): + return chain + prev_major -= 1 + + return chain + + +def extract_model_from_output(output: str) -> str | None: + """Extract first model-like token from CLI output.""" + match = MODEL_NAME_RE.search(output) + if not match: + return None + return normalize_model_name(match.group(1)) + + +def extract_configured_effort_from_output(output: str) -> str | None: + """Extract configured effort token from output, when present.""" + text = output.lower() + lines = text.splitlines() + for line in lines: + if not any( + marker in line + for marker in ("model_reasoning_effort", "reasoning.effort", "reasoning effort") + ): + continue + for effort in REASONING_EFFORT_ORDER: + if re.search(rf"\b{re.escape(effort)}\b", line): + return effort + for effort in REASONING_EFFORT_ORDER: + if re.search(rf"\b{re.escape(effort)}\b", text): + return effort + return None + + +def extract_supported_effort_from_output(output: str) -> str | None: + """Extract highest-priority supported effort from output hints.""" + text = output.lower() + if not RetryStrategy.reasoning_effort_unsupported(output): + return None + + for effort in REASONING_EFFORT_ORDER[1:]: + if re.search(rf"\b{re.escape(effort)}\b", text): + return effort + return None + + +def next_lower_effort(current_effort: str | None) -> str | None: + """Return next lower effort in fallback order.""" + if not current_effort: + return "high" + normalized = current_effort.strip().lower() + try: + idx = REASONING_EFFORT_ORDER.index(normalized) + except ValueError: + return "high" + next_idx = idx + 1 + if next_idx >= len(REASONING_EFFORT_ORDER): + return None + return REASONING_EFFORT_ORDER[next_idx] + + +def build_review_cmd( + *, + target_args: list[str], + prompt: str, + model: str | None, + effort: str | None, +) -> list[str]: + """Build codex review command with optional model/effort overrides.""" + cmd = ["codex", "review", *target_args] + if model: + cmd += ["-c", f'model="{model}"'] + if effort: + cmd += ["-c", f'model_reasoning_effort="{effort}"'] + cmd.append(prompt) + return cmd + + +def next_fallback_model( + *, + anchor_model: str, + effort: str | None, + tried_attempts: set[tuple[str, str]], +) -> str | None: + """Return next predecessor model not yet attempted for current effort.""" + chain = build_model_fallback_chain(anchor_model) + if len(chain) <= 1: + return None + + for candidate in chain[1:]: + key = (candidate, effort or "") + if key not in tried_attempts: + return candidate + return None + + +def run_review_pass_with_fallback( + *, + repo_root: Path, + out_file: Path, + target_args: list[str], + pass_spec: PassSpec, + prompt: str, + model_state: ModelFallbackState, + allow_compat_short_circuit: bool, +) -> tuple[int, str]: + """Run codex review with model/effort fallback, return (exit_code, output).""" + attempted: set[tuple[str, str]] = set() + + initial_model = normalize_model_name(model_state.selected_model) or normalize_model_name( + model_state.preferred_model + ) + fallback_anchor_model = initial_model + initial_effort = model_state.selected_effort + + queue: deque[tuple[str | None, str | None]] = deque() + queue.append((initial_model, initial_effort)) + + last_exit = 1 + last_output = "" + + # Best-effort traversal: exhaust all discovered model/effort candidates. + # Termination is guaranteed by attempted-set deduplication. + while queue: + model, effort = queue.popleft() + model = normalize_model_name(model) + effort = effort.strip().lower() if isinstance(effort, str) and effort.strip() else None + + key = (model or "", effort or "") + if key in attempted: + continue + attempted.add(key) + + cmd = build_review_cmd( + target_args=target_args, + prompt=prompt, + model=model, + effort=effort, + ) + exit_code = run_captured(cmd, repo_root, out_file, stream_output=True) + last_output = out_file.read_text(encoding="utf-8", errors="replace") + last_exit = exit_code + + if exit_code == 0: + model_state.selected_model = model + model_state.selected_effort = effort + return 0, last_output + + if allow_compat_short_circuit and RetryStrategy.should_retry_with_compat(last_output): + return exit_code, last_output + + if RetryStrategy.model_unavailable(last_output): + if not fallback_anchor_model: + fallback_anchor_model = ( + model + or extract_model_from_output(last_output) + or normalize_model_name(model_state.preferred_model) + ) + if fallback_anchor_model: + next_model = next_fallback_model( + anchor_model=fallback_anchor_model, + effort=effort, + tried_attempts=attempted, + ) + if next_model: + print( + f"warning: model '{model or fallback_anchor_model}' unavailable; retrying pass " + f"'{pass_spec.name}' with predecessor model '{next_model}'.", + file=sys.stderr, + ) + queue.appendleft((next_model, effort)) + continue + + if RetryStrategy.reasoning_effort_unsupported(last_output): + supported_effort = extract_supported_effort_from_output(last_output) + if supported_effort and (model or "", supported_effort) not in attempted: + print( + f"warning: model_reasoning_effort unsupported; retrying pass " + f"'{pass_spec.name}' with '{supported_effort}'.", + file=sys.stderr, + ) + queue.appendleft((model, supported_effort)) + continue + + configured_effort = effort or extract_configured_effort_from_output(last_output) + lower_effort = next_lower_effort(configured_effort) + if lower_effort and (model or "", lower_effort) not in attempted: + from_effort = configured_effort or "configured-default" + print( + f"warning: model_reasoning_effort '{from_effort}' unsupported; retrying " + f"pass '{pass_spec.name}' with '{lower_effort}'.", + file=sys.stderr, + ) + queue.appendleft((model, lower_effort)) + continue + + break + + return last_exit, last_output + + +def run_review_pass_with_compat( + repo_root: Path, + out_file: Path, + target_args: list[str], + target_desc: str, + pass_spec: PassSpec, + model_state: ModelFallbackState, +) -> None: + """Run a review pass with compatibility retry.""" + exit_code, content = run_review_pass_with_fallback( + repo_root=repo_root, + out_file=out_file, + target_args=target_args, + pass_spec=pass_spec, + prompt=pass_spec.prompt, + model_state=model_state, + allow_compat_short_circuit=True, + ) + if exit_code == 0: + return + + if RetryStrategy.should_retry_with_compat(content) and target_args: + print( + f"warning: codex CLI rejected prompt+target flags; " + f"retrying pass '{pass_spec.name}' in prompt-only compatibility mode.", + file=sys.stderr, + ) + compat_prefix = ( + "Target selection requested for this pass:\n" + f"- {target_desc}\n" + "Apply review findings to the requested target using the repository context below." + ) + exit_code, _ = run_review_pass_with_fallback( + repo_root=repo_root, + out_file=out_file, + target_args=[], + pass_spec=pass_spec, + prompt=f"{compat_prefix}\n\n{pass_spec.prompt}", + model_state=model_state, + allow_compat_short_circuit=False, + ) + if exit_code == 0: + return + + raise CodexwError( + f"codex review failed in pass '{pass_spec.name}' with exit code {exit_code}. " + f"See {out_file} for details." + ) + + +class PassBuilder: + """Builds the list of passes to execute based on profile configuration.""" + + def __init__( + self, + profile: dict[str, Any], + rule_files: list[str], + changed_files: list[str], + modules: list[tuple[int, str]], + hotspots: list[str], + selected_domains: list[str], + ) -> None: + self.profile = profile + self.rule_files = rule_files + self.changed_files = changed_files + self.modules = modules + self.hotspots = hotspots + self.selected_domains = selected_domains + + # Build reusable prompt components + self.base_rubric = build_base_rubric(profile["repo_name"]) + self.rules_block = build_rule_block(rule_files) + self.diff_context = build_diff_context(changed_files, modules, hotspots) + self.global_prompt = profile.get("global_prompt", "") + + def _build_prompt(self, extra: str) -> str: + """Build a complete pass prompt.""" + return build_pass_prompt( + self.base_rubric, + self.rules_block, + self.diff_context, + self.global_prompt, + extra, + ) + + def build_passes(self) -> list[PassSpec]: + """Build list of PassSpec objects for execution.""" + passes: list[PassSpec] = [] + pipeline = self.profile["pipeline"] + pass_counter = 0 + + # Policy pass + if pipeline.get("include_policy_pass", True): + pass_counter += 1 + passes.append( + PassSpec( + id=f"pass-{pass_counter}-policy-sweep", + name="Policy: full standards coverage sweep", + prompt=self._build_prompt(str(pipeline.get("policy_instructions", ""))), + ) + ) + + # Core passes + if pipeline.get("include_core_passes", True) and "core" in self.selected_domains: + core_passes = pipeline.get("core_passes") or [] + for core_pass in core_passes: + pass_id = sanitize_pass_id(str(core_pass.get("id", "core-pass"))) + pass_name = str(core_pass.get("name", pass_id)).strip() or pass_id + instructions = str(core_pass.get("instructions", "")).strip() + if not instructions: + continue + pass_counter += 1 + passes.append( + PassSpec( + id=f"pass-{pass_counter}-{pass_id}", + name=pass_name, + prompt=self._build_prompt(instructions), + ) + ) + + # Domain passes + if pipeline.get("include_domain_passes", True): + for domain in self.selected_domains: + if domain == "core": + continue + pass_counter += 1 + slug = sanitize_pass_id(domain) + passes.append( + PassSpec( + id=f"pass-{pass_counter}-domain-{slug}", + name=f"Domain: {domain}", + prompt=self._build_prompt(build_domain_prompt(domain, self.profile)), + ) + ) + + # Depth passes + if pipeline.get("include_depth_passes", True): + depth_template = str( + pipeline.get("depth_instructions", DEFAULT_DEPTH_PASS_INSTRUCTIONS) + ) + for hotspot in self.hotspots: + pass_counter += 1 + hotspot_slug = sanitize_pass_id(hotspot.replace("/", "_")) + try: + depth_instructions = depth_template.format(hotspot=hotspot) + except Exception: + depth_instructions = DEFAULT_DEPTH_PASS_INSTRUCTIONS.format(hotspot=hotspot) + passes.append( + PassSpec( + id=f"pass-{pass_counter}-depth-{hotspot_slug}", + name=f"Depth hotspot: {hotspot}", + prompt=self._build_prompt(depth_instructions), + ) + ) + + return passes + + +class PassRunner: + """Executes review passes and collects results.""" + + def __init__( + self, + repo_root: Path, + output_root: Path, + target_args: list[str], + target_desc: str, + model_override: str | None = None, + ) -> None: + self.repo_root = repo_root + self.output_root = output_root + self.target_args = target_args + self.target_desc = target_desc + self.model_state = ModelFallbackState( + preferred_model=normalize_model_name(model_override), + ) + + def run_all(self, passes: list[PassSpec]) -> tuple[list[str], list[dict[str, Any]]]: + """Run all passes, return (summary_lines, raw_findings).""" + summary_lines: list[str] = [] + raw_findings: list[dict[str, Any]] = [] + + for index, pass_spec in enumerate(passes, start=1): + out_file = self.output_root / f"{pass_spec.id}.md" + print(f"\n==> ({index}/{len(passes)}) {pass_spec.name}") + + run_review_pass_with_compat( + repo_root=self.repo_root, + out_file=out_file, + target_args=self.target_args, + target_desc=self.target_desc, + pass_spec=pass_spec, + model_state=self.model_state, + ) + + text = out_file.read_text(encoding="utf-8", errors="replace") + parsed = parse_findings_from_pass(text, pass_spec.id) + no_findings = pass_has_no_findings(text, parsed) + + # Handle unparsed findings + if not no_findings and not parsed: + parsed = [ + { + "severity": "P2", + "type": "UnparsedFinding", + "file_path": "(unparsed-output)", + "line_raw": "", + "line": None, + "rule": "", + "risk": "Pass output contained findings but did not match structured schema.", + "fix": "Ensure findings follow the required schema.", + "title": pass_spec.name, + "pass_id": pass_spec.id, + } + ] + + if no_findings: + summary_lines.append(f"- [PASS] {pass_spec.name}") + else: + summary_lines.append(f"- [FINDINGS] {pass_spec.name}") + raw_findings.extend(parsed) + + return summary_lines, raw_findings diff --git a/codexw/profile.py b/codexw/profile.py new file mode 100644 index 0000000..723391b --- /dev/null +++ b/codexw/profile.py @@ -0,0 +1,518 @@ +"""Profile management for codexw. + +This module handles loading, normalizing, syncing, and writing +review profile files. Profiles define repository-specific review +configuration. +""" + +from __future__ import annotations + +import datetime as dt +import glob +import json +import re +from pathlib import Path +from typing import Any, Sequence + +from .constants import ( + DEFAULT_BASE_BRANCH, + DEFAULT_CORE_PASS_SPECS, + DEFAULT_DEPTH_HOTSPOTS, + DEFAULT_DEPTH_PASS_INSTRUCTIONS, + DEFAULT_GLOBAL_PROMPT, + DEFAULT_OUTPUT_ROOT, + DEFAULT_POLICY_PASS_INSTRUCTIONS, + DEFAULT_RULE_PATTERNS, + DEFAULT_STRICT_GATE, +) +from .git import detect_default_base +from .utils import ( + CodexwError, + ensure_dict, + stable_json, + to_bool, + to_int, + to_nonempty_string, + to_string_list, + unique, +) +from .yaml_fallback import try_load_yaml +from .yaml_writer import dump_yaml_text + + +def load_profile(path: Path) -> dict[str, Any]: + """Load profile from YAML or JSON file.""" + text = path.read_text(encoding="utf-8") + + try: + import yaml + + data = yaml.safe_load(text) + if not isinstance(data, dict): + raise CodexwError(f"profile at {path} must be a mapping/object") + return data + except ModuleNotFoundError: + pass + except Exception as exc: + raise CodexwError(f"invalid YAML in {path}: {exc}") + + try: + data = json.loads(text) + except json.JSONDecodeError: + from .yaml_fallback import parse_simple_yaml + + try: + data = parse_simple_yaml(text) + except ValueError as exc: + raise CodexwError("PyYAML not available and profile parsing failed. " f"Details: {exc}") + + if not isinstance(data, dict): + raise CodexwError(f"profile at {path} must be a mapping/object") + return data + + +def write_profile(path: Path, profile: dict[str, Any]) -> None: + """Write profile to YAML file.""" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(dump_yaml_text(profile), encoding="utf-8") + + +def normalize_profile(raw: dict[str, Any]) -> dict[str, Any]: + """Normalize raw profile dict into consistent structure.""" + repo = raw.get("repo") or {} + review = raw.get("review") or {} + rules = raw.get("rules") or {} + domains = raw.get("domains") or {} + prompts = raw.get("prompts") or {} + pipeline = raw.get("pipeline") or {} + + if not isinstance(repo, dict): + repo = {} + if not isinstance(review, dict): + review = {} + if not isinstance(rules, dict): + rules = {} + if not isinstance(domains, dict): + domains = {} + if not isinstance(prompts, dict): + prompts = {} + if not isinstance(pipeline, dict): + pipeline = {} + + allowed_domains = to_string_list(domains.get("allowed"), ["core"]) + default_domains = to_string_list(domains.get("default"), allowed_domains) + if not allowed_domains: + allowed_domains = ["core"] + if not default_domains: + default_domains = list(allowed_domains) + + domain_prompt_map = prompts.get("by_domain") + if not isinstance(domain_prompt_map, dict): + domain_prompt_map = {} + + pipeline_core_raw = pipeline.get("core_passes") + if not isinstance(pipeline_core_raw, list) or not pipeline_core_raw: + pipeline_core_raw = DEFAULT_CORE_PASS_SPECS + + pipeline_core_passes: list[dict[str, str]] = [] + for idx, raw_pass in enumerate(pipeline_core_raw, start=1): + if not isinstance(raw_pass, dict): + continue + pass_id = str(raw_pass.get("id", f"core-pass-{idx}")).strip() or f"core-pass-{idx}" + pass_name = str(raw_pass.get("name", pass_id)).strip() or pass_id + instructions = str(raw_pass.get("instructions", "")).strip() + if not instructions: + continue + pipeline_core_passes.append( + { + "id": pass_id, + "name": pass_name, + "instructions": instructions, + } + ) + + if not pipeline_core_passes: + pipeline_core_passes = json.loads(json.dumps(DEFAULT_CORE_PASS_SPECS)) + + return { + "version": str(raw.get("version", "1")), + "repo_name": to_nonempty_string(repo.get("name"), "Repository"), + "default_base": to_nonempty_string(review.get("default_base"), DEFAULT_BASE_BRANCH), + "strict_gate": to_bool(review.get("strict_gate"), DEFAULT_STRICT_GATE), + "depth_hotspots": to_int(review.get("depth_hotspots"), DEFAULT_DEPTH_HOTSPOTS), + "output_root": to_nonempty_string(review.get("output_root"), DEFAULT_OUTPUT_ROOT), + "rule_patterns": to_string_list(rules.get("include"), DEFAULT_RULE_PATTERNS), + "default_domains": default_domains, + "allowed_domains": allowed_domains, + "global_prompt": str(prompts.get("global", "")).strip(), + "domain_prompts": { + str(k): str(v).strip() for k, v in domain_prompt_map.items() if str(v).strip() + }, + "pipeline": { + "include_policy_pass": to_bool(pipeline.get("include_policy_pass"), True), + "include_core_passes": to_bool(pipeline.get("include_core_passes"), True), + "include_domain_passes": to_bool(pipeline.get("include_domain_passes"), True), + "include_depth_passes": to_bool(pipeline.get("include_depth_passes"), True), + "policy_instructions": str( + pipeline.get("policy_instructions", DEFAULT_POLICY_PASS_INSTRUCTIONS) + ).strip() + or DEFAULT_POLICY_PASS_INSTRUCTIONS, + "core_passes": pipeline_core_passes, + "depth_instructions": str( + pipeline.get("depth_instructions", DEFAULT_DEPTH_PASS_INSTRUCTIONS) + ).strip() + or DEFAULT_DEPTH_PASS_INSTRUCTIONS, + }, + } + + +def infer_repo_name(repo_root: Path) -> str: + """Infer repository name from directory name.""" + raw = repo_root.name.strip() + if not raw: + return "Repository" + + tokens = [t for t in re.split(r"[-_]+", raw) if t] + if not tokens: + return raw + + special = { + "ios": "iOS", + "android": "Android", + "api": "API", + "sdk": "SDK", + "ml": "ML", + "ai": "AI", + "ui": "UI", + } + + def normalize(token: str) -> str: + return special.get(token.lower(), token.capitalize()) + + return " ".join(normalize(t) for t in tokens) + + +def infer_rule_patterns(repo_root: Path) -> list[str]: + """Infer rule patterns from repository structure.""" + patterns: list[str] = [] + if (repo_root / "AGENTS.md").is_file(): + patterns.append("AGENTS.md") + if (repo_root / ".cursor/rules").is_dir(): + patterns.append(".cursor/rules/**/*.mdc") + if (repo_root / ".code_review").is_dir(): + patterns.append(".code_review/**/*.md") + if not patterns: + patterns = list(DEFAULT_RULE_PATTERNS) + return patterns + + +def discover_rule_files(repo_root: Path, patterns: Sequence[str]) -> list[str]: + """Discover rule files matching patterns.""" + matches: set[str] = set() + for pattern in patterns: + expanded = glob.glob(str(repo_root / pattern), recursive=True) + for abs_path in expanded: + p = Path(abs_path) + if not p.is_file(): + continue + try: + rel = p.relative_to(repo_root) + except ValueError: + continue + matches.add(str(rel)) + return sorted(matches) + + +def validate_rule_patterns(repo_root: Path, patterns: Sequence[str]) -> tuple[list[str], list[str]]: + """Validate rule patterns, return (valid_patterns, warnings).""" + valid: list[str] = [] + warnings: list[str] = [] + for pattern in patterns: + normalized = str(pattern).strip() + if not normalized: + continue + matches = discover_rule_files(repo_root, [normalized]) + if matches: + valid.append(normalized) + continue + if any(ch in normalized for ch in "*?[]"): + warnings.append(f"rule pattern '{normalized}' matched no files") + else: + warnings.append(f"rule file '{normalized}' not found") + return valid, warnings + + +def default_domain_prompt_template(domain: str) -> str: + """Generate default domain-specific prompt template.""" + return ( + f"Domain focus: {domain}\n" + "Focus areas:\n" + "- domain-specific correctness and policy compliance\n" + "- behavior/regression risks and boundary-condition failures\n" + "- state, contract, lifecycle, or concurrency issues relevant to this domain\n" + "- missing or weak tests for critical domain behavior" + ) + + +def default_pipeline_config() -> dict[str, Any]: + """Return default pipeline configuration.""" + return { + "include_policy_pass": True, + "include_core_passes": True, + "include_domain_passes": True, + "include_depth_passes": True, + "policy_instructions": DEFAULT_POLICY_PASS_INSTRUCTIONS, + "core_passes": json.loads(json.dumps(DEFAULT_CORE_PASS_SPECS)), + "depth_instructions": DEFAULT_DEPTH_PASS_INSTRUCTIONS, + } + + +def build_bootstrap_profile(repo_root: Path) -> dict[str, Any]: + """Build initial profile from repository signals.""" + rule_patterns = infer_rule_patterns(repo_root) + rule_metadata = discover_rule_metadata(repo_root, rule_patterns) + domains = infer_domains_from_rule_metadata(rule_metadata) + by_domain: dict[str, str] = { + d: default_domain_prompt_template(d) for d in domains if d != "core" + } + + return { + "version": 1, + "repo": {"name": infer_repo_name(repo_root)}, + "review": { + "default_base": detect_default_base(repo_root), + "strict_gate": True, + "depth_hotspots": DEFAULT_DEPTH_HOTSPOTS, + "output_root": DEFAULT_OUTPUT_ROOT, + }, + "rules": {"include": rule_patterns}, + "domains": {"default": domains, "allowed": domains}, + "prompts": { + "global": DEFAULT_GLOBAL_PROMPT, + "by_domain": by_domain, + }, + "pipeline": default_pipeline_config(), + } + + +def parse_frontmatter(path: Path) -> dict[str, Any]: + """Parse YAML frontmatter from a file.""" + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + return {} + + if not text.startswith("---"): + return {} + + match = re.match(r"^---\s*\n(.*?)\n---\s*(?:\n|$)", text, flags=re.DOTALL) + if not match: + return {} + try: + return try_load_yaml(match.group(1)) + except ValueError: + # Rule frontmatter should not fail the entire review bootstrap path. + return {} + + +def _to_boolish(value: Any) -> bool | None: + if isinstance(value, bool): + return value + if isinstance(value, str): + lowered = value.strip().lower() + if lowered in {"true", "1", "yes", "on"}: + return True + if lowered in {"false", "0", "no", "off"}: + return False + return None + + +def _extract_rule_domains(meta: dict[str, Any], rel_path: str) -> list[str]: + domains: list[str] = [] + candidates = [ + meta.get("domain"), + meta.get("domains"), + meta.get("tags"), + meta.get("category"), + meta.get("categories"), + ] + for candidate in candidates: + for item in to_string_list(candidate, []): + normalized = item.strip().lower().replace(" ", "-") + if normalized: + domains.append(normalized) + return unique(domains) + + +def discover_rule_metadata(repo_root: Path, patterns: list[str]) -> list[dict[str, Any]]: + files = discover_rule_files(repo_root, patterns) + rows: list[dict[str, Any]] = [] + for rel in files: + abs_path = repo_root / rel + meta = parse_frontmatter(abs_path) + always_apply = _to_boolish(meta.get("always_apply")) + if always_apply is None: + always_apply = _to_boolish(meta.get("alwaysApply")) + description = str(meta.get("description", "")).strip() + rows.append( + { + "path": rel, + "always_apply": bool(always_apply) if always_apply is not None else False, + "domains": _extract_rule_domains(meta, rel), + "description": description, + } + ) + return rows + + +def infer_domains_from_rule_metadata(rule_metadata: list[dict[str, Any]]) -> list[str]: + domains = {"core"} + for row in rule_metadata: + for domain in to_string_list(row.get("domains"), []): + domains.add(domain) + + result: list[str] = [] + if "core" in domains: + result.append("core") + for domain in sorted(domains): + if domain and domain not in result: + result.append(domain) + return result + + +def sync_profile_with_repo( + raw_profile: dict[str, Any], + repo_root: Path, + *, + prune_autogen: bool, +) -> tuple[dict[str, Any], bool]: + before = stable_json(raw_profile) + profile: dict[str, Any] = json.loads(json.dumps(raw_profile)) + inferred = build_bootstrap_profile(repo_root) + + profile_meta = ensure_dict(profile, "profile_meta") + autogen = ensure_dict(profile_meta, "autogen") + prev_autogen_rules = to_string_list(autogen.get("rules_include"), []) + prev_autogen_domains = to_string_list(autogen.get("domains"), []) + prev_prompt_raw = autogen.get("prompt_by_domain") + prev_autogen_prompt_map: dict[str, str] = {} + if isinstance(prev_prompt_raw, dict): + for key, value in prev_prompt_raw.items(): + k = str(key).strip() + if k: + prev_autogen_prompt_map[k] = str(value) + + repo = ensure_dict(profile, "repo") + if not str(repo.get("name", "")).strip(): + repo["name"] = inferred["repo"]["name"] + + review = ensure_dict(profile, "review") + if not str(review.get("default_base", "")).strip(): + review["default_base"] = inferred["review"]["default_base"] + if "strict_gate" not in review: + review["strict_gate"] = True + if "depth_hotspots" not in review: + review["depth_hotspots"] = DEFAULT_DEPTH_HOTSPOTS + if not str(review.get("output_root", "")).strip(): + review["output_root"] = DEFAULT_OUTPUT_ROOT + + rules = ensure_dict(profile, "rules") + existing_patterns = to_string_list(rules.get("include"), []) + inferred_patterns = to_string_list(inferred["rules"]["include"], []) + if prune_autogen and prev_autogen_rules: + existing_patterns = [p for p in existing_patterns if p not in set(prev_autogen_rules)] + rules["include"] = unique(existing_patterns + inferred_patterns) + + domains = ensure_dict(profile, "domains") + existing_allowed = to_string_list(domains.get("allowed"), []) + existing_default = to_string_list(domains.get("default"), []) + inferred_domains = to_string_list(inferred["domains"]["default"], ["core"]) + if prune_autogen and prev_autogen_domains: + prev_domain_set = set(prev_autogen_domains) + existing_allowed = [d for d in existing_allowed if d not in prev_domain_set] + existing_default = [d for d in existing_default if d not in prev_domain_set] + + merged_allowed = unique(existing_allowed + inferred_domains) + merged_default = unique(existing_default + inferred_domains) + merged_default = [d for d in merged_default if d in set(merged_allowed)] + if not merged_allowed: + merged_allowed = ["core"] + if not merged_default: + merged_default = ["core"] + domains["allowed"] = merged_allowed + domains["default"] = merged_default + + prompts = ensure_dict(profile, "prompts") + if not str(prompts.get("global", "")).strip(): + prompts["global"] = inferred["prompts"]["global"] + + by_domain = prompts.get("by_domain") + if not isinstance(by_domain, dict): + by_domain = {} + + inferred_by_domain = inferred["prompts"]["by_domain"] + new_autogen_prompt_map = dict(prev_autogen_prompt_map) + for domain in merged_allowed: + if domain not in inferred_by_domain: + continue + inferred_prompt = inferred_by_domain[domain] + existing_prompt = str(by_domain.get(domain, "")).strip() + prev_prompt = str(prev_autogen_prompt_map.get(domain, "")).strip() + if not existing_prompt: + by_domain[domain] = inferred_prompt + elif prev_prompt and existing_prompt == prev_prompt and existing_prompt != inferred_prompt: + by_domain[domain] = inferred_prompt + new_autogen_prompt_map[domain] = inferred_prompt + + if prune_autogen: + for domain in list(by_domain.keys()): + if domain in inferred_by_domain: + continue + prev_prompt = str(prev_autogen_prompt_map.get(domain, "")).strip() + current_prompt = str(by_domain.get(domain, "")).strip() + if prev_prompt and current_prompt == prev_prompt: + del by_domain[domain] + new_autogen_prompt_map.pop(domain, None) + + prompts["by_domain"] = by_domain + + pipeline = ensure_dict(profile, "pipeline") + inferred_pipeline = inferred.get("pipeline") + if isinstance(inferred_pipeline, dict): + for key, value in inferred_pipeline.items(): + if key not in pipeline: + pipeline[key] = value + existing_core_passes = pipeline.get("core_passes") + if not isinstance(existing_core_passes, list) or not existing_core_passes: + pipeline["core_passes"] = inferred_pipeline.get("core_passes", []) + + if "version" not in profile: + profile["version"] = 1 + + after_without_meta = stable_json(profile) + changed = before != after_without_meta + + if prune_autogen: + autogen["rules_include"] = inferred_patterns + autogen["domains"] = inferred_domains + autogen["prompt_by_domain"] = { + domain: prompt + for domain, prompt in new_autogen_prompt_map.items() + if domain in inferred_by_domain + } + else: + autogen["rules_include"] = unique(prev_autogen_rules + inferred_patterns) + autogen["domains"] = unique(prev_autogen_domains + inferred_domains) + preserved = dict(prev_autogen_prompt_map) + for domain, prompt in inferred_by_domain.items(): + preserved[domain] = prompt + autogen["prompt_by_domain"] = preserved + + meta = ensure_dict(profile, "profile_meta") + if changed: + meta["managed_by"] = "codexw" + meta["last_synced_utc"] = dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + meta["sync_mode"] = "merge+prune" if prune_autogen else "merge" + + final_changed = before != stable_json(profile) + return profile, final_changed diff --git a/codexw/prompts.py b/codexw/prompts.py new file mode 100644 index 0000000..a27da0c --- /dev/null +++ b/codexw/prompts.py @@ -0,0 +1,89 @@ +"""Prompt building for codexw. + +This module handles construction of review prompts for each pass type. +Centralizes prompt logic to make it easier to understand and modify. +""" + +from __future__ import annotations + +from typing import Any + +from .constants import NO_FINDINGS_SENTINEL + + +def build_base_rubric(repo_name: str) -> str: + """Build the base rubric used in all review passes.""" + return ( + f"Act as a strict PR gate reviewer for {repo_name}.\n" + "Return only actionable findings.\n\n" + "Enforcement order:\n" + "- AGENTS.md instructions\n" + "- Domain-specific internal rule files listed below\n" + "- Engineering correctness and risk\n\n" + "For each finding include:\n" + "- Severity: P0, P1, P2, or P3\n" + "- Type: Bug | Regression | Security | Concurrency | TestGap | RuleViolation\n" + "- File path\n" + "- Precise line number or tight line range\n" + "- Violated rule and rule file path (when applicable)\n" + "- Why this is risky\n" + "- Minimal fix direction\n\n" + "Do not output style-only comments unless they violate a required internal rule.\n" + f'If no findings, output exactly: "{NO_FINDINGS_SENTINEL}".' + ) + + +def build_rule_block(rule_files: list[str]) -> str: + """Build the rule enforcement block for prompts.""" + if not rule_files: + return "Required standards files (read and enforce strictly):\n- (none discovered)" + lines = ["Required standards files (read and enforce strictly):"] + lines.extend([f"- {rule}" for rule in rule_files]) + return "\n".join(lines) + + +def build_diff_context( + changed_files: list[str], + modules: list[tuple[int, str]], + hotspots: list[str], +) -> str: + """Build the diff context block for prompts.""" + mod_lines = "\n".join([f"- {m} ({c} files)" for c, m in modules]) or "- (none)" + hot_lines = "\n".join([f"- {h}" for h in hotspots]) or "- (none)" + file_lines = "\n".join([f"- {f}" for f in changed_files]) or "- (none)" + return ( + "Change context for breadth/depth coverage:\n" + f"- Changed files count: {len(changed_files)}\n" + "- Changed modules:\n" + f"{mod_lines}\n" + "- Top hotspots (by changed lines):\n" + f"{hot_lines}\n" + "- Changed files:\n" + f"{file_lines}" + ) + + +def build_domain_prompt(domain: str, profile: dict[str, Any]) -> str: + """Build domain-specific prompt.""" + custom = profile["domain_prompts"].get(domain, "") + base = ( + f"Domain focus: {domain}\n" + f"- identify domain-specific correctness and policy violations for '{domain}'\n" + "- prioritize regressions and production-risk behavior in changed code" + ) + return base + ("\n" + custom if custom else "") + + +def build_pass_prompt( + base_rubric: str, + rules_block: str, + diff_context: str, + global_prompt: str, + extra: str, +) -> str: + """Compose a complete pass prompt from components.""" + parts = [base_rubric, rules_block, diff_context] + if global_prompt: + parts.append("Profile global context:\n" + global_prompt) + parts.append(extra) + return "\n\n".join([p for p in parts if p.strip()]) diff --git a/codexw/reporting.py b/codexw/reporting.py new file mode 100644 index 0000000..96427ed --- /dev/null +++ b/codexw/reporting.py @@ -0,0 +1,179 @@ +"""Report generation for codexw. + +This module handles writing review artifacts: combined reports, +findings JSON, and per-pass outputs. +""" + +from __future__ import annotations + +import datetime as dt +import json +from pathlib import Path +from typing import Any + + +def utc_now() -> dt.datetime: + """Return timezone-aware UTC datetime.""" + return dt.datetime.now(dt.timezone.utc) + + +def write_findings_json( + path: Path, + target_desc: str, + raw_findings: list[dict[str, Any]], +) -> None: + """Write findings to JSON file.""" + path.write_text( + json.dumps( + { + "generated_utc": utc_now().strftime("%Y-%m-%dT%H:%M:%SZ"), + "target": target_desc, + "counts": { + "active": len(raw_findings), + }, + "active_findings": raw_findings, + }, + indent=2, + ) + + "\n", + encoding="utf-8", + ) + + +def write_combined_report( + path: Path, + profile: dict[str, Any], + profile_path: Path, + repo_root: Path, + target_desc: str, + selected_domains: list[str], + rule_files: list[str], + changed_files: list[str], + modules: list[tuple[int, str]], + hotspots: list[str], + depth_hotspots: int, + pass_count: int, + summary_lines: list[str], + raw_findings: list[dict[str, Any]], + findings_json_path: Path, + output_root: Path, + title: str | None = None, + model_override: str | None = None, +) -> None: + """Write the combined markdown report.""" + try: + profile_display = str(profile_path.relative_to(repo_root)) + except ValueError: + profile_display = str(profile_path) + + with path.open("w", encoding="utf-8") as fh: + fh.write("# Codex PR-Grade Multi-Pass Review\n\n") + fh.write(f"- Generated: {utc_now().strftime('%Y-%m-%d %H:%M:%SZ')}\n") + fh.write(f"- Repository context: {profile['repo_name']}\n") + fh.write(f"- Target: {target_desc}\n") + fh.write(f"- Domains: {','.join(selected_domains)}\n") + fh.write(f"- Auto-enforced rule files: {len(rule_files)}\n") + fh.write(f"- Changed files: {len(changed_files)}\n") + fh.write(f"- Depth hotspots: {depth_hotspots}\n") + if title: + fh.write(f"- Title: {title}\n") + if model_override: + fh.write(f"- Model override: {model_override}\n") + fh.write(f"- Pass count: {pass_count}\n") + fh.write(f"- Profile file: {profile_display}\n\n") + + fh.write("## Findings Summary\n\n") + fh.write(f"- Active findings: {len(raw_findings)}\n") + fh.write(f"- JSON artifact: {findings_json_path}\n\n") + + fh.write("## Pass Status\n\n") + fh.write("\n".join(summary_lines) + "\n\n") + + fh.write("## Auto-Enforced Rule Files\n\n") + if rule_files: + fh.write("\n".join(rule_files) + "\n\n") + else: + fh.write("(none discovered)\n\n") + + fh.write("## Changed Modules\n\n") + if modules: + fh.write("\n".join([f"{count}\t{module}" for count, module in modules]) + "\n\n") + else: + fh.write("(none)\n\n") + + fh.write("## Changed Files\n\n") + fh.write("\n".join(changed_files) + "\n\n") + + fh.write("## Hotspots\n\n") + fh.write(("\n".join(hotspots) if hotspots else "(none)") + "\n\n") + + # Append per-pass outputs + for pass_file in sorted(output_root.glob("pass-*.md")): + fh.write(f"## {pass_file.stem}\n\n") + pass_text = pass_file.read_text(encoding="utf-8") + fh.write(pass_text) + if not pass_text.endswith("\n"): + fh.write("\n") + fh.write("\n") + + +def write_empty_report( + path: Path, + profile: dict[str, Any], + target_desc: str, + selected_domains: list[str], +) -> None: + """Write a report for empty diff case.""" + path.write_text( + "\n".join( + [ + "# Codex PR-Grade Multi-Pass Review", + "", + f"- Generated: {utc_now().strftime('%Y-%m-%d %H:%M:%SZ')}", + f"- Repository context: {profile['repo_name']}", + f"- Target: {target_desc}", + f"- Domains: {','.join(selected_domains)}", + "- Changed files: 0", + "", + "No files detected for selected target.", + ] + ) + + "\n", + encoding="utf-8", + ) + + +def write_support_files( + output_root: Path, + rule_files: list[str], + changed_files: list[str], + modules: list[tuple[int, str]], + hotspots: list[str], + summary_lines: list[str], +) -> None: + """Write supporting text files for artifacts.""" + (output_root / "enforced-rule-files.txt").write_text( + "\n".join(rule_files) + ("\n" if rule_files else ""), + encoding="utf-8", + ) + + (output_root / "changed-files.txt").write_text( + "\n".join(changed_files) + ("\n" if changed_files else ""), + encoding="utf-8", + ) + + (output_root / "changed-modules.txt").write_text( + "\n".join([f"{count}\t{module}" for count, module in modules]) + ("\n" if modules else ""), + encoding="utf-8", + ) + + (output_root / "hotspots.txt").write_text( + "\n".join(hotspots) + ("\n" if hotspots else ""), + encoding="utf-8", + ) + + if summary_lines: + (output_root / "pass-status.md").write_text( + "\n".join(summary_lines) + "\n", + encoding="utf-8", + ) diff --git a/codexw/utils.py b/codexw/utils.py new file mode 100644 index 0000000..ba0cda0 --- /dev/null +++ b/codexw/utils.py @@ -0,0 +1,156 @@ +"""Utility functions for codexw. + +This module contains small, reusable helper functions that don't fit +into a specific domain module. Keeps other modules focused on their +primary responsibility. +""" + +from __future__ import annotations + +import json +import os +import re +import shlex +import subprocess +from pathlib import Path +from typing import Any, Sequence + + +class CodexwError(Exception): + """Base exception for codexw errors.""" + + def __init__(self, message: str, code: int = 1) -> None: + super().__init__(message) + self.code = code + + +def die(message: str, code: int = 1) -> None: + """Print error message and raise CodexwError. + + This function is kept for backward compatibility with existing code. + New code should raise CodexwError directly. + """ + raise CodexwError(message, code) + + +def run_checked(cmd: list[str], cwd: Path) -> str: + """Run a command and return stdout, or die on failure.""" + try: + proc = subprocess.run( + cmd, + cwd=str(cwd), + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + ) + except subprocess.CalledProcessError as exc: + stderr = exc.stderr.strip() + stdout = exc.stdout.strip() + details = stderr or stdout or "command failed" + die(f"{' '.join(shlex.quote(x) for x in cmd)} :: {details}") + return proc.stdout + + +def run_captured(cmd: list[str], cwd: Path, out_file: Path, *, stream_output: bool) -> int: + """Run a command, capture output to file, optionally stream to stdout.""" + proc = subprocess.run( + cmd, + cwd=str(cwd), + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + check=False, + ) + output = proc.stdout or "" + out_file.write_text(output, encoding="utf-8") + if stream_output and output: + print(output, end="") + return proc.returncode + + +def shutil_which(name: str) -> str | None: + """Find executable in PATH (minimal shutil.which replacement).""" + paths = os.environ.get("PATH", "").split(os.pathsep) + for directory in paths: + candidate = Path(directory) / name + if candidate.exists() and os.access(candidate, os.X_OK): + return str(candidate) + return None + + +def to_bool(value: Any, default: bool) -> bool: + """Convert value to boolean with default.""" + if value is None: + return default + if isinstance(value, bool): + return value + if isinstance(value, str): + norm = value.strip().lower() + if norm in {"1", "true", "yes", "on"}: + return True + if norm in {"0", "false", "no", "off"}: + return False + return default + + +def to_int(value: Any, default: int) -> int: + """Convert value to non-negative integer with default.""" + if value is None: + return default + try: + parsed = int(value) + return parsed if parsed >= 0 else default + except (TypeError, ValueError): + return default + + +def to_string_list(value: Any, default: Sequence[str] | None = None) -> list[str]: + """Convert value to list of non-empty strings.""" + if value is None: + return list(default or []) + if isinstance(value, list): + return [str(x).strip() for x in value if str(x).strip()] + if isinstance(value, str): + return [x.strip() for x in value.split(",") if x.strip()] + return list(default or []) + + +def to_nonempty_string(value: Any, default: str) -> str: + """Convert value to non-empty string with default.""" + if isinstance(value, str): + text = value.strip() + return text if text else default + return default + + +def unique(values: list[str]) -> list[str]: + """Return list with duplicates removed, preserving order.""" + seen: set[str] = set() + out: list[str] = [] + for v in values: + s = str(v).strip() + if not s or s in seen: + continue + seen.add(s) + out.append(s) + return out + + +def ensure_dict(parent: dict[str, Any], key: str) -> dict[str, Any]: + """Ensure parent[key] is a dict, creating if needed.""" + cur = parent.get(key) + if isinstance(cur, dict): + return cur + parent[key] = {} + return parent[key] + + +def sanitize_pass_id(value: str) -> str: + """Convert string to valid pass ID (alphanumeric, dash, underscore).""" + return re.sub(r"[^a-zA-Z0-9_-]", "-", value.strip()).strip("-") or "pass" + + +def stable_json(obj: Any) -> str: + """Return deterministic JSON string for comparison.""" + return json.dumps(obj, sort_keys=True, separators=(",", ":")) diff --git a/codexw/yaml_fallback.py b/codexw/yaml_fallback.py new file mode 100644 index 0000000..2e0ca01 --- /dev/null +++ b/codexw/yaml_fallback.py @@ -0,0 +1,413 @@ +"""Minimal YAML fallback parser for codexw. + +This module provides basic YAML parsing when PyYAML is not installed. +It supports the subset of YAML features needed for profile files: +- Mappings and lists +- Flow collections ([a, b] and {k: v}) +- Block scalars (| and >) +- Quoted strings (single and double) +- Comments +- Basic scalar types (bool, int, float, null) + +For full YAML support, install PyYAML: pip install pyyaml + +The fallback is intentionally limited to reduce maintenance burden. +Complex YAML should use PyYAML. +""" + +from __future__ import annotations + +import json +import re +from typing import Any + +from .yaml_writer import dump_yaml_text + + +def try_load_yaml(text: str) -> dict[str, Any]: + """Try to load YAML text, using PyYAML if available, fallback otherwise.""" + try: + import yaml # type: ignore + + data = yaml.safe_load(text) + return data if isinstance(data, dict) else {} + except ModuleNotFoundError: + pass + except Exception: + return {} + + # Fallback to simple parser + return parse_simple_yaml(text) + + +def parse_simple_yaml(text: str) -> dict[str, Any]: + """Parse a simple YAML document into a dict.""" + return _SimpleYamlParser(text).parse() + + +# --- Internal implementation --- + + +def _strip_inline_comment(raw: str) -> str: + """Remove inline YAML comment from a line.""" + text = raw.rstrip() + in_single = False + in_double = False + escaped = False + idx = 0 + + while idx < len(text): + ch = text[idx] + if in_double: + if escaped: + escaped = False + elif ch == "\\": + escaped = True + elif ch == '"': + in_double = False + idx += 1 + continue + + if in_single: + if ch == "'": + if idx + 1 < len(text) and text[idx + 1] == "'": + idx += 2 + continue + in_single = False + idx += 1 + continue + + if ch == '"': + in_double = True + elif ch == "'": + in_single = True + elif ch == "#": + prefix = text[:idx].rstrip() + if idx == 0 or text[idx - 1].isspace(): + return text[:idx].rstrip() + # Check if we're after a closed collection or quoted scalar + if _is_closed_flow(prefix) or _is_closed_quoted(prefix): + return text[:idx].rstrip() + idx += 1 + return text + + +def _is_closed_quoted(text: str) -> bool: + """Check if text ends with a closed quoted scalar.""" + stripped = text.strip() + if len(stripped) < 2: + return False + + if stripped[0] == "'" and stripped[-1] == "'": + return True + if stripped[0] == '"' and stripped[-1] == '"': + return True + return False + + +def _is_closed_flow(text: str) -> bool: + """Check if text ends with a closed flow collection.""" + stripped = text.strip() + if len(stripped) < 2: + return False + if stripped[0] == "[" and stripped[-1] == "]": + return True + if stripped[0] == "{" and stripped[-1] == "}": + return True + return False + + +def _parse_scalar(raw: str) -> Any: + """Parse a simple YAML scalar value.""" + token = _strip_inline_comment(raw).strip() + if token == "": + return "" + + lowered = token.lower() + if lowered == "true": + return True + if lowered == "false": + return False + if lowered in {"null", "~"}: + return None + if token == "{}": + return {} + if token == "[]": + return [] + + # Flow list + if token.startswith("[") and token.endswith("]"): + inner = token[1:-1].strip() + if not inner: + return [] + return [_parse_scalar(item) for item in _split_flow_items(inner)] + + # Flow map + if token.startswith("{") and token.endswith("}"): + inner = token[1:-1].strip() + if not inner: + return {} + out: dict[str, Any] = {} + for item in _split_flow_items(inner): + if ":" not in item: + return token + key_raw, value_raw = item.split(":", 1) + key = _parse_scalar(key_raw) + out[str(key)] = _parse_scalar(value_raw) + return out + + # Integer + if re.fullmatch(r"[+-]?\d+", token): + try: + return int(token) + except ValueError: + return token + + # Float + if re.fullmatch(r"[+-]?\d+\.\d+", token): + try: + return float(token) + except ValueError: + return token + + # Double-quoted string + if token.startswith('"') and token.endswith('"'): + try: + return json.loads(token) + except json.JSONDecodeError: + return token[1:-1] + + # Single-quoted string + if token.startswith("'") and token.endswith("'") and len(token) >= 2: + return token[1:-1].replace("''", "'") + + return token + + +def _split_flow_items(raw: str) -> list[str]: + """Split flow collection items, respecting nesting and quotes.""" + items: list[str] = [] + buf: list[str] = [] + in_single = False + in_double = False + escaped = False + depth = 0 + + for ch in raw: + if in_double: + buf.append(ch) + if escaped: + escaped = False + elif ch == "\\": + escaped = True + elif ch == '"': + in_double = False + continue + + if in_single: + buf.append(ch) + if ch == "'": + in_single = False + continue + + if ch == '"': + in_double = True + buf.append(ch) + continue + if ch == "'": + in_single = True + buf.append(ch) + continue + + if ch in "[{(": + depth += 1 + buf.append(ch) + continue + if ch in "]})": + if depth > 0: + depth -= 1 + buf.append(ch) + continue + + if ch == "," and depth == 0: + items.append("".join(buf).strip()) + buf = [] + continue + + buf.append(ch) + + tail = "".join(buf).strip() + if tail: + items.append(tail) + return items + + +class _SimpleYamlParser: + """Simple recursive descent YAML parser.""" + + def __init__(self, text: str) -> None: + self.lines = text.splitlines() + self.index = 0 + + @staticmethod + def _indent(line: str) -> int: + return len(line) - len(line.lstrip(" ")) + + @staticmethod + def _is_ignorable(line: str) -> bool: + stripped = line.strip() + return not stripped or stripped.startswith("#") or stripped in {"---", "..."} + + def _skip_ignorable(self) -> None: + while self.index < len(self.lines) and self._is_ignorable(self.lines[self.index]): + self.index += 1 + + def parse(self) -> dict[str, Any]: + self._skip_ignorable() + if self.index >= len(self.lines): + return {} + start_indent = self._indent(self.lines[self.index]) + value = self._parse_block(start_indent) + self._skip_ignorable() + if self.index < len(self.lines): + raise ValueError(f"unexpected trailing content near line {self.index + 1}") + if not isinstance(value, dict): + raise ValueError("top-level YAML must be a mapping") + return value + + def _parse_block(self, indent: int) -> Any: + self._skip_ignorable() + if self.index >= len(self.lines): + return {} + + cur_indent = self._indent(self.lines[self.index]) + if cur_indent < indent: + return {} + indent = max(cur_indent, indent) + + content = self.lines[self.index][indent:] + if content == "-" or content.startswith("- "): + return self._parse_list(indent) + return self._parse_map(indent) + + def _parse_map(self, indent: int) -> dict[str, Any]: + out: dict[str, Any] = {} + while True: + self._skip_ignorable() + if self.index >= len(self.lines): + break + + line = self.lines[self.index] + cur_indent = self._indent(line) + if cur_indent < indent: + break + if cur_indent > indent: + raise ValueError(f"unexpected indentation at line {self.index + 1}") + + content = line[indent:] + if content == "-" or content.startswith("- "): + break + if ":" not in content: + raise ValueError(f"invalid mapping entry at line {self.index + 1}") + + key, raw_rest = content.split(":", 1) + key = key.strip() + rest = _strip_inline_comment(raw_rest).strip() + self.index += 1 + + if not key: + raise ValueError(f"empty mapping key at line {self.index}") + + if rest in {"|", "|-", ">", ">-"}: + out[key] = self._parse_block_scalar(cur_indent + 2) + elif rest == "": + out[key] = self._parse_nested(cur_indent + 2) + else: + out[key] = _parse_scalar(rest) + + return out + + def _parse_nested(self, expected_indent: int) -> Any: + self._skip_ignorable() + if self.index >= len(self.lines): + return None + + cur_indent = self._indent(self.lines[self.index]) + if cur_indent < expected_indent: + return None + expected_indent = max(cur_indent, expected_indent) + + content = self.lines[self.index][expected_indent:] + if content == "-" or content.startswith("- "): + return self._parse_list(expected_indent) + return self._parse_map(expected_indent) + + def _parse_list(self, indent: int) -> list[Any]: + out: list[Any] = [] + while True: + self._skip_ignorable() + if self.index >= len(self.lines): + break + + line = self.lines[self.index] + cur_indent = self._indent(line) + if cur_indent < indent: + break + if cur_indent > indent: + raise ValueError(f"unexpected indentation at line {self.index + 1}") + + content = line[indent:] + if not (content == "-" or content.startswith("- ")): + break + + rest = "" if content == "-" else _strip_inline_comment(content[2:]).strip() + self.index += 1 + + if rest in {"|", "|-", ">", ">-"}: + out.append(self._parse_block_scalar(indent + 2)) + continue + + if rest == "": + out.append(self._parse_nested(indent + 2)) + continue + + # Check for inline map in list item (- key: value) + inline_match = re.match(r"^([A-Za-z0-9_.-]+):(?:\s+|$)(.*)$", rest) + if inline_match: + key = inline_match.group(1).strip() + tail = _strip_inline_comment(inline_match.group(2)).strip() + item: dict[str, Any] = {} + if tail in {"|", "|-", ">", ">-"}: + item[key] = self._parse_block_scalar(indent + 4) + elif tail == "": + item[key] = self._parse_nested(indent + 4) + else: + item[key] = _parse_scalar(tail) + for extra_key, extra_val in self._parse_map(indent + 2).items(): + item[extra_key] = extra_val + out.append(item) + continue + + out.append(_parse_scalar(rest)) + + return out + + def _parse_block_scalar(self, indent: int) -> str: + lines: list[str] = [] + while self.index < len(self.lines): + raw = self.lines[self.index] + if raw.strip() == "": + lines.append("") + self.index += 1 + continue + + cur_indent = self._indent(raw) + if cur_indent < indent: + break + + lines.append(raw[indent:]) + self.index += 1 + + while lines and lines[-1] == "": + lines.pop() + return "\n".join(lines) diff --git a/codexw/yaml_writer.py b/codexw/yaml_writer.py new file mode 100644 index 0000000..30f4975 --- /dev/null +++ b/codexw/yaml_writer.py @@ -0,0 +1,110 @@ +"""Minimal YAML writer for codexw fallback paths. + +This module emits deterministic YAML for profile files without requiring +PyYAML. It supports the value shapes codexw writes (dict/list/scalars). +""" + +from __future__ import annotations + +import json +import re +from typing import Any + + +def dump_yaml_text(value: Any) -> str: + """Dump a value to YAML text.""" + return "\n".join(_yaml_emit(value)).rstrip() + "\n" + + +def _yaml_plain_scalar_allowed(value: str) -> bool: + """Check if value can be written as plain YAML scalar.""" + if not value or value.strip() != value: + return False + if any(ch in value for ch in ":#{}[]&,*!?|>'\"%@`"): + return False + if value[0] in "-?:!&*@`": + return False + if "\n" in value or "\r" in value or "\t" in value: + return False + lowered = value.lower() + if lowered in {"true", "false", "null", "~", "yes", "no", "on", "off"}: + return False + if re.fullmatch(r"[+-]?\d+(?:\.\d+)?", value): + return False + return True + + +def _yaml_inline_scalar(value: Any) -> str: + """Convert value to inline YAML scalar.""" + if value is None: + return "null" + if isinstance(value, bool): + return "true" if value else "false" + if isinstance(value, (int, float)): + return str(value) + text = str(value) + if _yaml_plain_scalar_allowed(text): + return text + return json.dumps(text) + + +def _yaml_emit(value: Any, indent: int = 0) -> list[str]: + """Emit value as YAML lines.""" + pad = " " * indent + + if isinstance(value, dict): + if not value: + return [pad + "{}"] + lines: list[str] = [] + for key, raw_val in value.items(): + key_text = str(key) + if isinstance(raw_val, str) and "\n" in raw_val: + lines.append(f"{pad}{key_text}: |") + for line in raw_val.splitlines(): + lines.append(" " * (indent + 2) + line) + continue + if isinstance(raw_val, dict): + if raw_val: + lines.append(f"{pad}{key_text}:") + lines.extend(_yaml_emit(raw_val, indent + 2)) + else: + lines.append(f"{pad}{key_text}: {{}}") + continue + if isinstance(raw_val, list): + if raw_val: + lines.append(f"{pad}{key_text}:") + lines.extend(_yaml_emit(raw_val, indent + 2)) + else: + lines.append(f"{pad}{key_text}: []") + continue + lines.append(f"{pad}{key_text}: {_yaml_inline_scalar(raw_val)}") + return lines + + if isinstance(value, list): + if not value: + return [pad + "[]"] + lines = [] + for item in value: + if isinstance(item, str) and "\n" in item: + lines.append(f"{pad}- |") + for line in item.splitlines(): + lines.append(" " * (indent + 2) + line) + continue + if isinstance(item, dict): + if not item: + lines.append(f"{pad}- {{}}") + else: + lines.append(f"{pad}-") + lines.extend(_yaml_emit(item, indent + 2)) + continue + if isinstance(item, list): + if not item: + lines.append(f"{pad}- []") + else: + lines.append(f"{pad}-") + lines.extend(_yaml_emit(item, indent + 2)) + continue + lines.append(f"{pad}- {_yaml_inline_scalar(item)}") + return lines + + return [pad + _yaml_inline_scalar(value)] diff --git a/test/codexw_fallback_yaml_test.py b/test/codexw_fallback_yaml_test.py deleted file mode 100644 index 26d1d25..0000000 --- a/test/codexw_fallback_yaml_test.py +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/env python3 -"""Targeted tests for codexw fallback YAML parser/writer behavior.""" - -from __future__ import annotations - -import importlib.machinery -import importlib.util -import pathlib -import tempfile -import unittest - - -REPO_ROOT = pathlib.Path(__file__).resolve().parents[1] -CODEXW_PATH = REPO_ROOT / "codexw" / "__main__.py" - - -def load_codexw_module(): - loader = importlib.machinery.SourceFileLoader("codexw_module", str(CODEXW_PATH)) - spec = importlib.util.spec_from_loader(loader.name, loader) - if spec is None: - raise RuntimeError("failed to build import spec for codexw") - module = importlib.util.module_from_spec(spec) - loader.exec_module(module) - return module - - -class CodexwFallbackYamlTests(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.codexw = load_codexw_module() - - def test_flow_list_parses_as_list(self): - parsed = self.codexw._parse_simple_yaml( - """ -domains: - default: [core] - allowed: [core, testing] -""".strip() - ) - - self.assertEqual(parsed["domains"]["default"], ["core"]) - self.assertEqual(parsed["domains"]["allowed"], ["core", "testing"]) - - normalized = self.codexw.normalize_profile(parsed) - self.assertEqual(normalized["default_domains"], ["core"]) - self.assertEqual(normalized["allowed_domains"], ["core", "testing"]) - - def test_inline_comments_do_not_override_values(self): - parsed = self.codexw._parse_simple_yaml( - """ -review: - strict_gate: false # advisory mode - depth_hotspots: 1 # small changes only -domains: - default: [core] - allowed: [core] -""".strip() - ) - - self.assertFalse(parsed["review"]["strict_gate"]) - self.assertEqual(parsed["review"]["depth_hotspots"], 1) - - normalized = self.codexw.normalize_profile(parsed) - self.assertFalse(normalized["strict_gate"]) - self.assertEqual(normalized["depth_hotspots"], 1) - - def test_no_space_comment_after_closed_flow_and_quoted_scalars(self): - parsed = self.codexw._parse_simple_yaml( - """ -domains: - default: [core]# comment - allowed: [core, testing]# comment -repo: - name: 'Repo Name'# comment -prompts: - global: "line"# comment -meta: - link: https://example.com/#fragment -""".strip() - ) - - self.assertEqual(parsed["domains"]["default"], ["core"]) - self.assertEqual(parsed["domains"]["allowed"], ["core", "testing"]) - self.assertEqual(parsed["repo"]["name"], "Repo Name") - self.assertEqual(parsed["prompts"]["global"], "line") - self.assertEqual(parsed["meta"]["link"], "https://example.com/#fragment") - - normalized = self.codexw.normalize_profile(parsed) - self.assertEqual(normalized["default_domains"], ["core"]) - self.assertEqual(normalized["allowed_domains"], ["core", "testing"]) - self.assertEqual(normalized["repo_name"], "Repo Name") - - def test_list_item_with_colon_is_not_forced_to_inline_map(self): - parsed = self.codexw._parse_simple_yaml( - """ -values: - - https://example.com -""".strip() - ) - self.assertEqual(parsed["values"], ["https://example.com"]) - - def test_single_quote_escapes_in_flow_items(self): - parsed = self.codexw._parse_simple_yaml( - """ -values: - - ['it''s,ok', core] -""".strip() - ) - self.assertEqual(parsed["values"], [["it's,ok", "core"]]) - - def test_explicit_nulls_do_not_turn_into_empty_maps(self): - parsed = self.codexw._parse_simple_yaml( - """ -review: - default_base: - output_root: -domains: - default: [core] - allowed: [core] -pipeline: - core_passes: - - id: core-breadth - name: Core breadth - instructions: | - test -""".strip() - ) - - self.assertIsNone(parsed["review"]["default_base"]) - self.assertIsNone(parsed["review"]["output_root"]) - - normalized = self.codexw.normalize_profile(parsed) - self.assertEqual(normalized["default_base"], "main") - self.assertEqual(normalized["output_root"], ".codex/review-runs") - - def test_dump_yaml_round_trips_with_fallback_parser(self): - profile = { - "version": 1, - "repo": {"name": "Repo"}, - "review": {"default_base": "main", "strict_gate": True, "depth_hotspots": 2}, - "domains": {"default": ["core"], "allowed": ["core", "testing"]}, - "prompts": { - "global": "Line 1\nLine 2", - "by_domain": {"testing": "Focus on tests"}, - }, - "pipeline": { - "include_policy_pass": True, - "include_core_passes": True, - "include_domain_passes": True, - "include_depth_passes": True, - "core_passes": [ - { - "id": "core-breadth", - "name": "Core breadth", - "instructions": "Task:\n- cover all files", - } - ], - }, - } - - dumped = self.codexw._dump_yaml_text(profile) - parsed = self.codexw._parse_simple_yaml(dumped) - - self.assertEqual(parsed["domains"]["allowed"], ["core", "testing"]) - self.assertEqual(parsed["review"]["depth_hotspots"], 2) - self.assertEqual(parsed["prompts"]["global"], "Line 1\nLine 2") - - def test_default_domain_prompt_template_is_repo_agnostic(self): - prompt = self.codexw.default_domain_prompt_template("custom-domain") - self.assertIn("Domain focus: custom-domain", prompt) - self.assertIn("domain-specific correctness and policy compliance", prompt) - self.assertNotIn("FakeUsersRepository", prompt) - self.assertNotIn("Duolingo", prompt) - - def test_bootstrap_profile_uses_generic_domain_prompts(self): - with tempfile.TemporaryDirectory() as tmp: - repo_root = pathlib.Path(tmp) - rules_dir = repo_root / ".cursor" / "rules" - rules_dir.mkdir(parents=True, exist_ok=True) - (rules_dir / "testing-rule.mdc").write_text( - """--- -description: Testing conventions -domain: testing ---- -Use testing standards. -""", - encoding="utf-8", - ) - - profile = self.codexw.build_bootstrap_profile(repo_root) - self.assertIn("testing", profile["domains"]["allowed"]) - self.assertEqual( - profile["prompts"]["by_domain"]["testing"], - self.codexw.default_domain_prompt_template("testing"), - ) - - def test_extract_rule_domains_does_not_keyword_infer_from_description(self): - domains = self.codexw._extract_rule_domains( - {"description": "check experiment treatment and dispatcher usage"}, - "rules/misc-rule.mdc", - ) - self.assertEqual(domains, []) - - def test_infer_domains_from_rule_metadata_is_generic(self): - inferred = self.codexw.infer_domains_from_rule_metadata( - [ - {"domains": ["zeta"]}, - {"domains": ["alpha"]}, - ] - ) - self.assertEqual(inferred, ["core", "alpha", "zeta"]) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/codexw_test.py b/test/codexw_test.py new file mode 100644 index 0000000..e3a9d33 --- /dev/null +++ b/test/codexw_test.py @@ -0,0 +1,763 @@ +#!/usr/bin/env python3 +"""Targeted regression tests for codexw.""" + +from __future__ import annotations + +import os +import pathlib +import shutil +import subprocess +import sys +import tempfile +import unittest +from contextlib import redirect_stderr +from io import StringIO +from unittest import mock + +# Add codexw to path +REPO_ROOT = pathlib.Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO_ROOT)) + +from codexw.yaml_fallback import parse_simple_yaml, dump_yaml_text +from codexw.cli import build_parser +from codexw.git import collect_numstat +from codexw.profile import ( + normalize_profile, + default_domain_prompt_template, + build_bootstrap_profile, + infer_domains_from_rule_metadata, +) +from codexw.passes import ( + ModelFallbackState, + PassSpec, + RetryStrategy, + build_model_fallback_chain, + extract_configured_effort_from_output, + extract_supported_effort_from_output, + run_review_pass_with_compat, +) +from codexw.utils import CodexwError + + +class CodexwTests(unittest.TestCase): + _SKIP_PRE_COMMIT_INTEGRATION_ENV = "CODEXW_SKIP_PRECOMMIT_INTEGRATION" + + @staticmethod + def _resolve_pre_commit_cmd() -> list[str]: + binary = shutil.which("pre-commit") + if binary: + return [binary] + + probe = subprocess.run( + [sys.executable, "-m", "pre_commit", "--version"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if probe.returncode == 0: + return [sys.executable, "-m", "pre_commit"] + + raise AssertionError( + "pre-commit is required for integration coverage. " + "Install pre-commit or make the pre_commit module available." + ) + + @staticmethod + def _snapshot_working_tree_to_git_repo(target_repo_root: pathlib.Path) -> str: + tracked = subprocess.run( + ["git", "ls-files", "-z"], + cwd=REPO_ROOT, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=False, + ).stdout.decode("utf-8", errors="replace") + untracked = subprocess.run( + ["git", "ls-files", "--others", "--exclude-standard", "-z"], + cwd=REPO_ROOT, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=False, + ).stdout.decode("utf-8", errors="replace") + + paths = { + p + for p in (tracked + untracked).split("\x00") + if p and not p.endswith("/") + } + + target_repo_root.mkdir(parents=True, exist_ok=True) + for rel in sorted(paths): + src = REPO_ROOT / rel + if not src.is_file(): + continue + dst = target_repo_root / rel + dst.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dst) + + subprocess.run( + ["git", "init"], + cwd=target_repo_root, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + subprocess.run(["git", "add", "."], cwd=target_repo_root, check=True) + subprocess.run( + [ + "git", + "-c", + "user.name=codexw-test", + "-c", + "user.email=codexw-test@example.com", + "commit", + "-m", + "snapshot", + ], + cwd=target_repo_root, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + return subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=target_repo_root, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ).stdout.strip() + + def test_flow_list_parses_as_list(self): + parsed = parse_simple_yaml( + """ +domains: + default: [core] + allowed: [core, testing] +""".strip() + ) + + self.assertEqual(parsed["domains"]["default"], ["core"]) + self.assertEqual(parsed["domains"]["allowed"], ["core", "testing"]) + + normalized = normalize_profile(parsed) + self.assertEqual(normalized["default_domains"], ["core"]) + self.assertEqual(normalized["allowed_domains"], ["core", "testing"]) + + def test_inline_comments_do_not_override_values(self): + parsed = parse_simple_yaml( + """ +review: + strict_gate: false # advisory mode + depth_hotspots: 1 # small changes only +domains: + default: [core] + allowed: [core] +""".strip() + ) + + self.assertFalse(parsed["review"]["strict_gate"]) + self.assertEqual(parsed["review"]["depth_hotspots"], 1) + + normalized = normalize_profile(parsed) + self.assertFalse(normalized["strict_gate"]) + self.assertEqual(normalized["depth_hotspots"], 1) + + def test_no_space_comment_after_closed_flow_and_quoted_scalars(self): + parsed = parse_simple_yaml( + """ +domains: + default: [core]# comment + allowed: [core, testing]# comment +repo: + name: 'Repo Name'# comment +prompts: + global: "line"# comment +meta: + link: https://example.com/#fragment +""".strip() + ) + + self.assertEqual(parsed["domains"]["default"], ["core"]) + self.assertEqual(parsed["domains"]["allowed"], ["core", "testing"]) + self.assertEqual(parsed["repo"]["name"], "Repo Name") + self.assertEqual(parsed["prompts"]["global"], "line") + self.assertEqual(parsed["meta"]["link"], "https://example.com/#fragment") + + normalized = normalize_profile(parsed) + self.assertEqual(normalized["default_domains"], ["core"]) + self.assertEqual(normalized["allowed_domains"], ["core", "testing"]) + self.assertEqual(normalized["repo_name"], "Repo Name") + + def test_list_item_with_colon_is_not_forced_to_inline_map(self): + parsed = parse_simple_yaml( + """ +values: + - https://example.com +""".strip() + ) + self.assertEqual(parsed["values"], ["https://example.com"]) + + def test_malformed_mapping_indentation_raises(self): + malformed = """ +rules: + include: + - AGENTS.md + - .cursor/rules/**/*.mdc +""".strip() + with self.assertRaises(ValueError): + parse_simple_yaml(malformed) + + def test_malformed_nested_mapping_indentation_raises(self): + malformed = """ +review: + strict_gate: true + depth_hotspots: 3 +""".strip() + with self.assertRaises(ValueError): + parse_simple_yaml(malformed) + + def test_single_quote_escapes_in_flow_items(self): + parsed = parse_simple_yaml( + """ +values: + - ['it''s,ok', core] +""".strip() + ) + self.assertEqual(parsed["values"], [["it's,ok", "core"]]) + + def test_explicit_nulls_do_not_turn_into_empty_maps(self): + parsed = parse_simple_yaml( + """ +review: + default_base: + output_root: +domains: + default: [core] + allowed: [core] +pipeline: + core_passes: + - id: core-breadth + name: Core breadth + instructions: | + test +""".strip() + ) + + self.assertIsNone(parsed["review"]["default_base"]) + self.assertIsNone(parsed["review"]["output_root"]) + + normalized = normalize_profile(parsed) + self.assertEqual(normalized["default_base"], "main") + self.assertEqual(normalized["output_root"], ".codex/review-runs") + + def test_dump_yaml_round_trips_with_fallback_parser(self): + profile = { + "version": 1, + "repo": {"name": "Repo"}, + "review": {"default_base": "main", "strict_gate": True, "depth_hotspots": 2}, + "domains": {"default": ["core"], "allowed": ["core", "testing"]}, + "prompts": { + "global": "Line 1\nLine 2", + "by_domain": {"testing": "Focus on tests"}, + }, + "pipeline": { + "include_policy_pass": True, + "include_core_passes": True, + "include_domain_passes": True, + "include_depth_passes": True, + "core_passes": [ + { + "id": "core-breadth", + "name": "Core breadth", + "instructions": "Task:\n- cover all files", + } + ], + }, + } + + dumped = dump_yaml_text(profile) + parsed = parse_simple_yaml(dumped) + + self.assertEqual(parsed["domains"]["allowed"], ["core", "testing"]) + self.assertEqual(parsed["review"]["depth_hotspots"], 2) + self.assertEqual(parsed["prompts"]["global"], "Line 1\nLine 2") + + def test_default_domain_prompt_template_is_repo_agnostic(self): + prompt = default_domain_prompt_template("custom-domain") + self.assertIn("Domain focus: custom-domain", prompt) + self.assertIn("domain-specific correctness and policy compliance", prompt) + self.assertNotIn("FakeUsersRepository", prompt) + self.assertNotIn("Duolingo", prompt) + + def test_bootstrap_profile_uses_generic_domain_prompts(self): + with tempfile.TemporaryDirectory() as tmp: + repo_root = pathlib.Path(tmp) + rules_dir = repo_root / ".cursor" / "rules" + rules_dir.mkdir(parents=True, exist_ok=True) + (rules_dir / "testing-rule.mdc").write_text( + """--- +description: Testing conventions +domain: testing +--- +Use testing standards. +""", + encoding="utf-8", + ) + + profile = build_bootstrap_profile(repo_root) + self.assertIn("testing", profile["domains"]["allowed"]) + self.assertEqual( + profile["prompts"]["by_domain"]["testing"], + default_domain_prompt_template("testing"), + ) + + def test_bootstrap_profile_ignores_malformed_rule_frontmatter(self): + with tempfile.TemporaryDirectory() as tmp: + repo_root = pathlib.Path(tmp) + rules_dir = repo_root / ".cursor" / "rules" + rules_dir.mkdir(parents=True, exist_ok=True) + (rules_dir / "broken-rule.mdc").write_text( + """--- +domain: + - testing + - broken +--- +Body +""", + encoding="utf-8", + ) + profile = build_bootstrap_profile(repo_root) + self.assertIn("core", profile["domains"]["allowed"]) + + def test_infer_domains_from_rule_metadata_is_generic(self): + inferred = infer_domains_from_rule_metadata( + [ + {"domains": ["zeta"]}, + {"domains": ["alpha"]}, + ] + ) + self.assertEqual(inferred, ["core", "alpha", "zeta"]) + + def test_script_entrypoint_runs_from_external_cwd(self): + script_path = REPO_ROOT / "codexw" / "__main__.py" + with tempfile.TemporaryDirectory() as tmp: + proc = subprocess.run( + [str(script_path), "review", "--help"], + cwd=tmp, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + self.assertEqual(proc.returncode, 0, msg=proc.stderr) + self.assertIn("usage: codexw review", proc.stdout) + + def test_module_entrypoint_runs_from_repo_root(self): + proc = subprocess.run( + [sys.executable, "-m", "codexw", "review", "--help"], + cwd=str(REPO_ROOT), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + self.assertEqual(proc.returncode, 0, msg=proc.stderr) + self.assertIn("usage: codexw review", proc.stdout) + + def test_cli_rejects_conflicting_gate_flags(self): + parser = build_parser() + with redirect_stderr(StringIO()): + with self.assertRaises(SystemExit): + parser.parse_args( + ["review", "--fail-on-findings", "--no-fail-on-findings"] + ) + + def test_uncommitted_numstat_includes_untracked_files(self): + with tempfile.TemporaryDirectory() as tmp: + repo_root = pathlib.Path(tmp) + subprocess.run( + ["git", "init"], + cwd=repo_root, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + (repo_root / "tracked.txt").write_text("seed\n", encoding="utf-8") + subprocess.run(["git", "add", "tracked.txt"], cwd=repo_root, check=True) + subprocess.run( + [ + "git", + "-c", + "user.name=codexw-test", + "-c", + "user.email=codexw-test@example.com", + "commit", + "-m", + "seed", + ], + cwd=repo_root, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + (repo_root / "new_untracked.py").write_text( + "line1\nline2\nline3\n", + encoding="utf-8", + ) + + rows = collect_numstat(repo_root, mode="uncommitted", base="main", commit="") + by_path = {path: delta for delta, path in rows} + self.assertIn("new_untracked.py", by_path) + self.assertEqual(by_path["new_untracked.py"], 3) + + def test_pre_commit_hook_runs_pr_grade_wrapper_with_print_effective_profile(self): + skip_flag = os.environ.get(self._SKIP_PRE_COMMIT_INTEGRATION_ENV, "").strip().lower() + if skip_flag in {"1", "true", "yes", "on"}: + self.skipTest( + "Skipping pre-commit integration test because " + f"{self._SKIP_PRE_COMMIT_INTEGRATION_ENV}={skip_flag!r}" + ) + + pre_commit_cmd = self._resolve_pre_commit_cmd() + + with tempfile.TemporaryDirectory() as tmp: + tmp_root = pathlib.Path(tmp) + hook_repo_root = tmp_root / "hook-repo" + consumer_root = tmp_root / "consumer-repo" + + repo_rev = self._snapshot_working_tree_to_git_repo(hook_repo_root) + consumer_root.mkdir(parents=True, exist_ok=True) + + subprocess.run( + ["git", "init"], + cwd=consumer_root, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + (consumer_root / "sample.txt").write_text("sample\n", encoding="utf-8") + subprocess.run(["git", "add", "sample.txt"], cwd=consumer_root, check=True) + subprocess.run( + [ + "git", + "-c", + "user.name=codexw-test", + "-c", + "user.email=codexw-test@example.com", + "commit", + "-m", + "seed", + ], + cwd=consumer_root, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + (consumer_root / ".pre-commit-config.yaml").write_text( + ( + "repos:\n" + f" - repo: {hook_repo_root}\n" + f" rev: {repo_rev}\n" + " hooks:\n" + " - id: codex-review-pr-grade\n" + " args:\n" + " - --print-effective-profile\n" + ), + encoding="utf-8", + ) + + proc = subprocess.run( + [ + *pre_commit_cmd, + "run", + "codex-review-pr-grade", + "--all-files", + "--hook-stage", + "manual", + ], + cwd=consumer_root, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + if proc.returncode != 0: + self.fail( + "pre-commit codex-review-pr-grade hook failed.\n" + f"stdout:\n{proc.stdout}\n" + f"stderr:\n{proc.stderr}" + ) + + self.assertIn("Codex AI Code Review (PR-grade)", proc.stdout) + self.assertIn('"effective_profile"', proc.stdout) + self.assertTrue((consumer_root / "local-review-profile.yaml").is_file()) + + def test_recursive_model_fallback_chain(self): + chain = build_model_fallback_chain("gpt-5.3-codex") + self.assertEqual( + chain, + [ + "gpt-5.3-codex", + "gpt-5.2-codex", + "gpt-5.1-codex", + "gpt-5-codex", + "gpt-4.2-codex", + ], + ) + self.assertEqual(len(chain), len(set(chain))) + + def test_pass_retry_falls_back_model_then_effort(self): + pass_spec = PassSpec( + id="pass-1-policy-sweep", + name="Policy pass", + prompt="Run policy review", + ) + model_state = ModelFallbackState() + + with tempfile.TemporaryDirectory() as tmp: + repo_root = pathlib.Path(tmp) + out_file = repo_root / "pass.md" + calls: list[list[str]] = [] + responses = [ + ( + 1, + "error: model_not_found: The model `gpt-5.3-codex` " + "does not exist or you do not have access to it.", + ), + ( + 1, + "error: model_reasoning_effort 'xhigh' is not supported for this model. " + "Supported values: high, medium, low.", + ), + (0, "No actionable findings."), + ] + + def fake_run(cmd, cwd, write_to, stream_output): + _ = cwd, stream_output + calls.append(cmd) + exit_code, text = responses[len(calls) - 1] + write_to.write_text(text, encoding="utf-8") + return exit_code + + with mock.patch("codexw.passes.run_captured", side_effect=fake_run): + run_review_pass_with_compat( + repo_root=repo_root, + out_file=out_file, + target_args=["--uncommitted"], + target_desc="uncommitted changes", + pass_spec=pass_spec, + model_state=model_state, + ) + + self.assertEqual(len(calls), 3) + self.assertNotIn('model="', " ".join(calls[0])) + self.assertIn('model="gpt-5.2-codex"', " ".join(calls[1])) + self.assertIn('model="gpt-5.2-codex"', " ".join(calls[2])) + self.assertIn('model_reasoning_effort="high"', " ".join(calls[2])) + self.assertEqual(model_state.selected_model, "gpt-5.2-codex") + self.assertEqual(model_state.selected_effort, "high") + + def test_pass_retry_reuses_resolved_model_and_effort(self): + pass_spec = PassSpec( + id="pass-2-core-breadth", + name="Core breadth", + prompt="Run core pass", + ) + model_state = ModelFallbackState( + preferred_model="gpt-5.3-codex", + selected_model="gpt-5.2-codex", + selected_effort="high", + ) + + with tempfile.TemporaryDirectory() as tmp: + repo_root = pathlib.Path(tmp) + out_file = repo_root / "pass.md" + calls: list[list[str]] = [] + + def fake_run(cmd, cwd, write_to, stream_output): + _ = cwd, stream_output + calls.append(cmd) + write_to.write_text("No actionable findings.", encoding="utf-8") + return 0 + + with mock.patch("codexw.passes.run_captured", side_effect=fake_run): + run_review_pass_with_compat( + repo_root=repo_root, + out_file=out_file, + target_args=["--base", "main"], + target_desc="base branch: main", + pass_spec=pass_spec, + model_state=model_state, + ) + + self.assertEqual(len(calls), 1) + joined = " ".join(calls[0]) + self.assertIn('model="gpt-5.2-codex"', joined) + self.assertIn('model_reasoning_effort="high"', joined) + + def test_pass_retry_uses_recent_five_model_window(self): + pass_spec = PassSpec( + id="pass-3-policy-sweep", + name="Policy pass recent chain", + prompt="Run policy review", + ) + model_state = ModelFallbackState(preferred_model="gpt-9.9-codex") + + with tempfile.TemporaryDirectory() as tmp: + repo_root = pathlib.Path(tmp) + out_file = repo_root / "pass.md" + calls: list[list[str]] = [] + + fallback_chain = build_model_fallback_chain("gpt-9.9-codex") + final_model = fallback_chain[-1] + + def fake_run(cmd, cwd, write_to, stream_output): + _ = cwd, stream_output + calls.append(cmd) + joined = " ".join(cmd) + if f'model="{final_model}"' in joined: + write_to.write_text("No actionable findings.", encoding="utf-8") + return 0 + + current_model = None + for model in fallback_chain: + if f'model="{model}"' in joined: + current_model = model + break + if current_model is None: + current_model = fallback_chain[0] + + write_to.write_text( + "error: model_not_found: The model " + f"`{current_model}` does not exist or you do not have access to it.", + encoding="utf-8", + ) + return 1 + + with mock.patch("codexw.passes.run_captured", side_effect=fake_run): + run_review_pass_with_compat( + repo_root=repo_root, + out_file=out_file, + target_args=["--uncommitted"], + target_desc="uncommitted changes", + pass_spec=pass_spec, + model_state=model_state, + ) + + # Recency policy limits fallback to latest five candidate models. + self.assertEqual(len(calls), 5) + self.assertIn(f'model="{final_model}"', " ".join(calls[-1])) + self.assertEqual(model_state.selected_model, final_model) + + def test_pass_retry_does_not_slide_beyond_recent_five_window(self): + pass_spec = PassSpec( + id="pass-4-policy-sweep", + name="Policy pass fixed window", + prompt="Run policy review", + ) + model_state = ModelFallbackState(preferred_model="gpt-9.9-codex") + + with tempfile.TemporaryDirectory() as tmp: + repo_root = pathlib.Path(tmp) + out_file = repo_root / "pass.md" + calls: list[list[str]] = [] + expected_chain = build_model_fallback_chain("gpt-9.9-codex") + + def fake_run(cmd, cwd, write_to, stream_output): + _ = cwd, stream_output + calls.append(cmd) + joined = " ".join(cmd) + + current_model = expected_chain[0] + for model in expected_chain: + if f'model="{model}"' in joined: + current_model = model + break + + write_to.write_text( + "error: model_not_found: The model " + f"`{current_model}` does not exist or you do not have access to it.", + encoding="utf-8", + ) + return 1 + + with mock.patch("codexw.passes.run_captured", side_effect=fake_run): + with self.assertRaises(CodexwError): + run_review_pass_with_compat( + repo_root=repo_root, + out_file=out_file, + target_args=["--uncommitted"], + target_desc="uncommitted changes", + pass_spec=pass_spec, + model_state=model_state, + ) + + self.assertEqual(len(calls), len(expected_chain)) + for idx, model in enumerate(expected_chain): + self.assertIn(f'model="{model}"', " ".join(calls[idx])) + self.assertIsNone(model_state.selected_model) + + def test_previous_major_includes_dot_two_variant(self): + chain = build_model_fallback_chain("gpt-4.2-codex") + self.assertEqual( + chain, + [ + "gpt-4.2-codex", + "gpt-4.1-codex", + "gpt-4-codex", + "gpt-3.2-codex", + "gpt-3.1-codex", + ], + ) + + def test_reasoning_effort_unsupported_detects_reasoning_dot_effort_param(self): + output = """ +ERROR: { + "error": { + "message": "Unsupported value: 'xhigh' is not supported with the 'gpt-5-codex' model. Supported values are: 'low', 'medium', and 'high'.", + "type": "invalid_request_error", + "param": "reasoning.effort", + "code": "unsupported_value" + } +} +""".strip() + self.assertTrue(RetryStrategy.reasoning_effort_unsupported(output)) + self.assertEqual(extract_supported_effort_from_output(output), "high") + + def test_extract_configured_effort_from_reasoning_effort_banner(self): + output = """ +model: gpt-5-codex +reasoning effort: xhigh +""".strip() + self.assertEqual(extract_configured_effort_from_output(output), "xhigh") + + def test_model_unavailable_detects_chatgpt_model_not_supported_message(self): + output = ( + "ERROR: {\"detail\":\"The 'gpt-0-codex' model is not supported when using Codex " + "with a ChatGPT account.\"}" + ) + self.assertTrue(RetryStrategy.model_unavailable(output)) + + def test_model_unavailable_does_not_trigger_on_reasoning_effort_error(self): + output = """ +ERROR: { + "error": { + "message": "Unsupported value: 'xhigh' is not supported with the 'gpt-5-codex' model.", + "param": "reasoning.effort", + "code": "unsupported_value" + } +} +""".strip() + self.assertFalse(RetryStrategy.model_unavailable(output)) + + +if __name__ == "__main__": + unittest.main() From 14efa049c4edfdc4dbe0fd2bfbece2448c92248c Mon Sep 17 00:00:00 2001 From: pavan Date: Tue, 24 Feb 2026 00:16:53 +0530 Subject: [PATCH 7/7] Resolve PR review findings for base ref fallback and report scoping --- .pre-commit-hooks.yaml | 17 ++++++++ README.md | 8 +++- codexw/__main__.py | 8 +++- codexw/features-and-usecases.md | 2 +- codexw/git.py | 40 ++++++++++++----- codexw/passes.py | 11 +++-- codexw/reporting.py | 6 +-- test/codexw_test.py | 76 ++++++++++++++++++++++++++++++--- 8 files changed, 141 insertions(+), 27 deletions(-) diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index d938faf..90c0bde 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -35,6 +35,15 @@ stages: [manual] verbose: true +- id: codexw + name: Codexw (alias) + description: Alias for codex-review-pr-grade. + entry: ./codexw/__main__.py review + language: script + pass_filenames: false + stages: [manual] + verbose: true + # Dev hooks for testing PRs in the duolingo/pre-commit-hooks repo. # Usage: edit a consumer repo's hook config to declare `id: duolingo-dev` # and `rev: `, then run `pre-commit run --all-files` @@ -66,3 +75,11 @@ pass_filenames: false stages: [manual] verbose: true + +- id: codexw-dev + name: Codexw (alias, dev) + entry: ./codexw/__main__.py review + language: script + pass_filenames: false + stages: [manual] + verbose: true diff --git a/README.md b/README.md index 956350a..e05d1b5 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ codex review --uncommitted codex review --base master ``` -## Codex PR-grade Hook (`codex-review-pr-grade`) +## Codex PR-grade Hook (`codex-review-pr-grade`, alias: `codexw`) Profile-aware multi-pass local review using `codexw`. This hook is also `manual` by default and does not block normal commits. @@ -108,9 +108,11 @@ PR-grade outputs include: ```bash # Run PR-grade review for current diff vs profile default base branch pre-commit run codex-review-pr-grade +pre-commit run codexw # Run PR-grade review for all files (still uses profile + pass orchestration) pre-commit run codex-review-pr-grade --all-files +pre-commit run codexw --all-files ``` Direct execution (without pre-commit): @@ -279,7 +281,7 @@ Feature/use-case guide: - `codexw/architecture.md` (internal architecture) Hook id for pre-commit: -`codex-review-pr-grade` +`codex-review-pr-grade` (canonical), `codexw` (alias) ## Usage @@ -304,6 +306,8 @@ Repo maintainers can declare these hooks in `.pre-commit-config.yaml`: - id: codex-review # On-demand PR-grade Codex review (manual stage, profile-aware) - id: codex-review-pr-grade + # Equivalent alias for PR-grade Codex review (manual stage, profile-aware) + - id: codexw ``` Directories named `build` and `node_modules` are excluded by default - no need to declare them in the hook's `exclude` key. diff --git a/codexw/__main__.py b/codexw/__main__.py index 866836e..e9c227c 100755 --- a/codexw/__main__.py +++ b/codexw/__main__.py @@ -35,6 +35,7 @@ collect_changed_files, collect_numstat, find_repo_root, + resolve_base_ref, ) from codexw.passes import PassBuilder, PassRunner from codexw.profile import ( @@ -60,6 +61,7 @@ collect_changed_files, collect_numstat, find_repo_root, + resolve_base_ref, ) from .passes import PassBuilder, PassRunner from .profile import ( @@ -176,6 +178,8 @@ def run_review(args) -> int: mode = "uncommitted" elif args.commit: mode = "commit" + elif mode == "base": + base_branch = resolve_base_ref(repo_root, base_branch) # Determine gating mode fail_on_findings = profile["strict_gate"] @@ -268,7 +272,7 @@ def run_review(args) -> int: target_desc=target_desc, model_override=model_override or None, ) - summary_lines, raw_findings = pass_runner.run_all(passes) + summary_lines, raw_findings, executed_pass_files = pass_runner.run_all(passes) # Write support files write_support_files( @@ -302,7 +306,7 @@ def run_review(args) -> int: summary_lines=summary_lines, raw_findings=raw_findings, findings_json_path=findings_json, - output_root=output_root, + executed_pass_files=executed_pass_files, title=args.title, model_override=model_override, ) diff --git a/codexw/features-and-usecases.md b/codexw/features-and-usecases.md index b309f99..7d18166 100644 --- a/codexw/features-and-usecases.md +++ b/codexw/features-and-usecases.md @@ -8,7 +8,7 @@ This document describes the local review capabilities provided by `codexw` and a Runs plain `codex review` from manual pre-commit stage for fast local sanity checks before push. Why it matters: lowest-latency feedback path. -2. **PR-grade review (`codex-review-pr-grade`)** +2. **PR-grade review (`codex-review-pr-grade`, alias `codexw`)** Runs `./codexw/__main__.py review` with profile-driven multi-pass orchestration. Why it matters: deeper and more consistent review than one-shot prompts. diff --git a/codexw/git.py b/codexw/git.py index d505a5b..6f50716 100644 --- a/codexw/git.py +++ b/codexw/git.py @@ -70,22 +70,41 @@ def git_ref_exists(repo_root: Path, ref: str) -> bool: def detect_default_base(repo_root: Path) -> str: """Detect the default base branch (master or main).""" - # Check local branches first, then remote - candidates = ["master", "main"] - ref_types = ["refs/heads/{}", "refs/remotes/origin/{}"] - - for ref_template in ref_types: - for candidate in candidates: - if git_ref_exists(repo_root, ref_template.format(candidate)): - return candidate + # Prefer local branches; if only remote-tracking exists, return + # remote-qualified ref so diff commands remain valid in detached clones. + for candidate in ("master", "main"): + if git_ref_exists(repo_root, f"refs/heads/{candidate}"): + return candidate + for candidate in ("master", "main"): + if git_ref_exists(repo_root, f"refs/remotes/origin/{candidate}"): + return f"origin/{candidate}" return "main" +def resolve_base_ref(repo_root: Path, base: str) -> str: + """Resolve branch-like base to a usable git ref. + + If the requested base is a plain branch name and no local branch exists + but `origin/` does, return `origin/`. + """ + raw = str(base).strip() + if not raw: + return raw + if "/" in raw: + return raw + if git_ref_exists(repo_root, f"refs/heads/{raw}"): + return raw + if git_ref_exists(repo_root, f"refs/remotes/origin/{raw}"): + return f"origin/{raw}" + return raw + + def collect_changed_files(repo_root: Path, mode: str, base: str, commit: str) -> list[str]: """Collect list of changed files based on review mode.""" if mode == "base": - out = run_checked(["git", "diff", "--name-only", f"{base}...HEAD"], repo_root) + base_ref = resolve_base_ref(repo_root, base) + out = run_checked(["git", "diff", "--name-only", f"{base_ref}...HEAD"], repo_root) return sorted({line.strip() for line in out.splitlines() if line.strip()}) if mode == "uncommitted": @@ -103,7 +122,8 @@ def collect_changed_files(repo_root: Path, mode: str, base: str, commit: str) -> def collect_numstat(repo_root: Path, mode: str, base: str, commit: str) -> list[tuple[int, str]]: """Collect file change statistics (added + deleted lines per file).""" if mode == "base": - cmd = ["git", "diff", "--numstat", f"{base}...HEAD"] + base_ref = resolve_base_ref(repo_root, base) + cmd = ["git", "diff", "--numstat", f"{base_ref}...HEAD"] elif mode == "uncommitted": cmd = ["git", "diff", "--numstat", "HEAD"] elif mode == "commit": diff --git a/codexw/passes.py b/codexw/passes.py index f8e8e65..ee206f4 100644 --- a/codexw/passes.py +++ b/codexw/passes.py @@ -571,13 +571,18 @@ def __init__( preferred_model=normalize_model_name(model_override), ) - def run_all(self, passes: list[PassSpec]) -> tuple[list[str], list[dict[str, Any]]]: - """Run all passes, return (summary_lines, raw_findings).""" + def run_all( + self, + passes: list[PassSpec], + ) -> tuple[list[str], list[dict[str, Any]], list[Path]]: + """Run all passes, return (summary_lines, raw_findings, executed_pass_files).""" summary_lines: list[str] = [] raw_findings: list[dict[str, Any]] = [] + executed_pass_files: list[Path] = [] for index, pass_spec in enumerate(passes, start=1): out_file = self.output_root / f"{pass_spec.id}.md" + executed_pass_files.append(out_file) print(f"\n==> ({index}/{len(passes)}) {pass_spec.name}") run_review_pass_with_compat( @@ -616,4 +621,4 @@ def run_all(self, passes: list[PassSpec]) -> tuple[list[str], list[dict[str, Any summary_lines.append(f"- [FINDINGS] {pass_spec.name}") raw_findings.extend(parsed) - return summary_lines, raw_findings + return summary_lines, raw_findings, executed_pass_files diff --git a/codexw/reporting.py b/codexw/reporting.py index 96427ed..716ebab 100644 --- a/codexw/reporting.py +++ b/codexw/reporting.py @@ -56,7 +56,7 @@ def write_combined_report( summary_lines: list[str], raw_findings: list[dict[str, Any]], findings_json_path: Path, - output_root: Path, + executed_pass_files: list[Path], title: str | None = None, model_override: str | None = None, ) -> None: @@ -107,8 +107,8 @@ def write_combined_report( fh.write("## Hotspots\n\n") fh.write(("\n".join(hotspots) if hotspots else "(none)") + "\n\n") - # Append per-pass outputs - for pass_file in sorted(output_root.glob("pass-*.md")): + # Append outputs from passes executed in this run only. + for pass_file in executed_pass_files: fh.write(f"## {pass_file.stem}\n\n") pass_text = pass_file.read_text(encoding="utf-8") fh.write(pass_text) diff --git a/test/codexw_test.py b/test/codexw_test.py index e3a9d33..7420200 100644 --- a/test/codexw_test.py +++ b/test/codexw_test.py @@ -20,13 +20,14 @@ from codexw.yaml_fallback import parse_simple_yaml, dump_yaml_text from codexw.cli import build_parser -from codexw.git import collect_numstat +from codexw.git import collect_numstat, detect_default_base, resolve_base_ref from codexw.profile import ( normalize_profile, default_domain_prompt_template, build_bootstrap_profile, infer_domains_from_rule_metadata, ) +from codexw.reporting import write_combined_report from codexw.passes import ( ModelFallbackState, PassSpec, @@ -415,7 +416,70 @@ def test_uncommitted_numstat_includes_untracked_files(self): self.assertIn("new_untracked.py", by_path) self.assertEqual(by_path["new_untracked.py"], 3) - def test_pre_commit_hook_runs_pr_grade_wrapper_with_print_effective_profile(self): + def test_detect_default_base_returns_remote_qualified_ref_when_local_missing(self): + def fake_ref_exists(_repo_root, ref): + return ref == "refs/remotes/origin/main" + + with mock.patch("codexw.git.git_ref_exists", side_effect=fake_ref_exists): + self.assertEqual(detect_default_base(REPO_ROOT), "origin/main") + + def test_resolve_base_ref_prefers_local_branch_over_remote(self): + def fake_ref_exists(_repo_root, ref): + return ref in {"refs/heads/main", "refs/remotes/origin/main"} + + with mock.patch("codexw.git.git_ref_exists", side_effect=fake_ref_exists): + self.assertEqual(resolve_base_ref(REPO_ROOT, "main"), "main") + + def test_resolve_base_ref_maps_to_origin_when_only_remote_exists(self): + def fake_ref_exists(_repo_root, ref): + return ref == "refs/remotes/origin/main" + + with mock.patch("codexw.git.git_ref_exists", side_effect=fake_ref_exists): + self.assertEqual(resolve_base_ref(REPO_ROOT, "main"), "origin/main") + + def test_combined_report_appends_only_executed_pass_files(self): + with tempfile.TemporaryDirectory() as tmp: + repo_root = pathlib.Path(tmp) + output_root = repo_root / "out" + output_root.mkdir(parents=True, exist_ok=True) + profile_path = repo_root / "local-review-profile.yaml" + profile_path.write_text("version: 1\n", encoding="utf-8") + + current_pass = output_root / "pass-1-current.md" + stale_pass = output_root / "pass-2-stale.md" + pass_status = output_root / "pass-status.md" + combined = output_root / "combined-report.md" + findings_json = output_root / "findings.json" + + current_pass.write_text("Current pass output\n", encoding="utf-8") + stale_pass.write_text("Stale pass output\n", encoding="utf-8") + pass_status.write_text("- [PASS] status\n", encoding="utf-8") + + write_combined_report( + path=combined, + profile={"repo_name": "Repo"}, + profile_path=profile_path, + repo_root=repo_root, + target_desc="base branch: main", + selected_domains=["core"], + rule_files=[], + changed_files=["a.py"], + modules=[(1, "a.py")], + hotspots=[], + depth_hotspots=0, + pass_count=1, + summary_lines=["- [PASS] current"], + raw_findings=[], + findings_json_path=findings_json, + executed_pass_files=[current_pass], + ) + + report = combined.read_text(encoding="utf-8") + self.assertIn("## pass-1-current", report) + self.assertNotIn("## pass-2-stale", report) + self.assertNotIn("## pass-status", report) + + def test_pre_commit_hook_runs_codexw_alias_with_print_effective_profile(self): skip_flag = os.environ.get(self._SKIP_PRE_COMMIT_INTEGRATION_ENV, "").strip().lower() if skip_flag in {"1", "true", "yes", "on"}: self.skipTest( @@ -467,7 +531,7 @@ def test_pre_commit_hook_runs_pr_grade_wrapper_with_print_effective_profile(self f" - repo: {hook_repo_root}\n" f" rev: {repo_rev}\n" " hooks:\n" - " - id: codex-review-pr-grade\n" + " - id: codexw\n" " args:\n" " - --print-effective-profile\n" ), @@ -478,7 +542,7 @@ def test_pre_commit_hook_runs_pr_grade_wrapper_with_print_effective_profile(self [ *pre_commit_cmd, "run", - "codex-review-pr-grade", + "codexw", "--all-files", "--hook-stage", "manual", @@ -491,12 +555,12 @@ def test_pre_commit_hook_runs_pr_grade_wrapper_with_print_effective_profile(self if proc.returncode != 0: self.fail( - "pre-commit codex-review-pr-grade hook failed.\n" + "pre-commit codexw hook failed.\n" f"stdout:\n{proc.stdout}\n" f"stderr:\n{proc.stderr}" ) - self.assertIn("Codex AI Code Review (PR-grade)", proc.stdout) + self.assertIn("Codexw (alias)", proc.stdout) self.assertIn('"effective_profile"', proc.stdout) self.assertTrue((consumer_root / "local-review-profile.yaml").is_file())