diff --git a/cli/src/commands/charter/audit.rs b/cli/src/commands/charter/audit.rs index 1aaa9d6..7b223a4 100644 --- a/cli/src/commands/charter/audit.rs +++ b/cli/src/commands/charter/audit.rs @@ -78,15 +78,16 @@ pub fn run( path: &str, charter_id: &str, range: Option<&str>, + prepare: bool, + merge_reports: bool, calibrate: bool, finalize: bool, merge_into: Option<&str>, ) -> Result<()> { - if calibrate && finalize { - bail!("--calibrate and --finalize are mutually exclusive — run one at a time"); - } - if merge_into.is_some() && !finalize { - bail!("--merge-into is only valid with --finalize"); + // --merge-into only makes sense with --merge-reports (or the deprecated + // --finalize that aliases to it). + if merge_into.is_some() && !merge_reports && !finalize { + bail!("--merge-into is only valid with --merge-reports (or the deprecated --finalize)"); } let resolved = utils::resolve_project_root(path) @@ -102,20 +103,41 @@ pub fn run( let canonical_id = canonical_charter_id(&charter.frontmatter.charter_id); - let audit_dir = project_root - .join("audit") - .join("charters") - .join(&canonical_id); - let prompts_dir = audit_dir.join("prompts"); - utils::ensure_dir(&prompts_dir)?; + // v1 canonical path: .devtrail/audits//. The audit-prompt is + // written directly to this dir; reports land here as report-*.md; the + // future review.md consolidated by the audit-review skill lands here too. + let audit_dir = devtrail_dir.join("audits").join(&canonical_id); + utils::ensure_dir(&audit_dir)?; let range = match range { Some(r) => r.to_string(), None => resolve_default_range(project_root), }; + // Deprecated v0 flag: --calibrate. v1 has no separate calibrate step + // (the main agent fills the calibrator role via /devtrail-audit-review + // skill). Emit guidance and exit nonzero so callers notice. + if calibrate { + eprintln!( + "{} --calibrate was the v0 way to resolve the calibrator prompt. \ + v1 of the audit flow eliminates that step — the main agent \ + reconciles N reports inline via the /devtrail-audit-review skill. \ + To merge reports into telemetry, use --merge-reports.", + "warn:".yellow().bold() + ); + bail!("--calibrate is no longer supported in the v1 audit flow"); + } + + // Deprecated v0 alias: --finalize → --merge-reports. if finalize { - return run_finalize( + eprintln!( + "{} --finalize is the v0 name for the merge step. The v1 \ + equivalent is --merge-reports (now reading N reports from \ + {}/report-*.md instead of two fixed files).", + "warn:".yellow().bold(), + relative_path(project_root, &audit_dir).display() + ); + return run_merge_reports( project_root, &devtrail_dir, &audit_dir, @@ -124,24 +146,22 @@ pub fn run( merge_into.map(Path::new), ); } - if calibrate { - return run_calibrate( + + if merge_reports { + return run_merge_reports( project_root, &devtrail_dir, &audit_dir, - &prompts_dir, &charter, - &range, + &canonical_id, + merge_into.map(Path::new), ); } - run_prepare( - project_root, - &devtrail_dir, - &audit_dir, - &prompts_dir, - &charter, - &range, - ) + + // Default action: prepare. The --prepare flag is accepted for + // self-documenting invocations but is also the implicit default. + let _ = prepare; + run_prepare(project_root, &devtrail_dir, &audit_dir, &charter, &range) } // ── Step 1: prepare ──────────────────────────────────────────────────────── @@ -150,150 +170,31 @@ fn run_prepare( project_root: &Path, devtrail_dir: &Path, audit_dir: &Path, - prompts_dir: &Path, charter: &Charter, range: &str, ) -> Result<()> { println!( "{} {} ({})", - "Step 1/3:".cyan().bold(), - "PREPARE".bold(), + "PREPARE".cyan().bold(), + "audit prompt".bold(), charter.frontmatter.charter_id.dimmed() ); let context = build_audit_context(project_root, charter, range)?; - for role in ["auditor-primary", "auditor-secondary"] { - let template_path = devtrail_dir - .join("audit-prompts") - .join(format!("{role}.md")); - let template = std::fs::read_to_string(&template_path).with_context(|| { - format!( - "Audit prompt template not found at {}. Run `devtrail repair` to restore framework files.", - template_path.display() - ) - })?; - let resolved = resolve_audit_template(&template, &context, role); - let out = prompts_dir.join(format!("{role}.prompt.md")); - std::fs::write(&out, resolved) - .with_context(|| format!("Failed to write resolved prompt to {}", out.display()))?; - println!( - " {} Wrote {}", - "✔".green().bold(), - relative_path(project_root, &out).display() - ); - } - - println!(); - println!(" {}", "Next:".bold()); - println!(" 1. Paste each prompt into your auditor of choice (use a model"); - println!(" of a different family per auditor — see CLI-REFERENCE)."); - println!(" 2. Save the auditor responses to:"); - println!( - " {}", - audit_dir - .join("auditor-primary.md") - .strip_prefix(project_root) - .unwrap_or_else(|_| audit_dir.as_ref()) - .display() - ); - println!( - " {}", - audit_dir - .join("auditor-secondary.md") - .strip_prefix(project_root) - .unwrap_or_else(|_| audit_dir.as_ref()) - .display() - ); - println!( - " 3. Run: {} {} --calibrate", - "devtrail charter audit".cyan(), - charter.frontmatter.charter_id.cyan() - ); - Ok(()) -} - -// ── Step 2: calibrate ────────────────────────────────────────────────────── - -fn run_calibrate( - project_root: &Path, - devtrail_dir: &Path, - audit_dir: &Path, - prompts_dir: &Path, - charter: &Charter, - range: &str, -) -> Result<()> { - println!( - "{} {} ({})", - "Step 2/3:".cyan().bold(), - "CALIBRATE".bold(), - charter.frontmatter.charter_id.dimmed() - ); - - let primary_path = audit_dir.join("auditor-primary.md"); - let secondary_path = audit_dir.join("auditor-secondary.md"); - - for (role, path) in [ - ("auditor-primary", &primary_path), - ("auditor-secondary", &secondary_path), - ] { - if !path.exists() { - bail!( - "{} not found. Save the {} response to that path before running --calibrate.", - path.display(), - role - ); - } - } - - let schema = AuditOutputSchema::load(devtrail_dir)?; - for path in [&primary_path, &secondary_path] { - let raw = std::fs::read_to_string(path) - .with_context(|| format!("Failed to read {}", path.display()))?; - let frontmatter = parse_frontmatter(&raw) - .with_context(|| format!("Failed to parse frontmatter in {}", path.display()))?; - let issues = schema.validate(&frontmatter, path); - if !issues.is_empty() { - eprintln!( - "{} validation issues in {}:", - "error:".red().bold(), - path.display() - ); - for issue in &issues { - eprintln!(" - {} [{}]", issue.message, issue.rule); - if let Some(hint) = &issue.fix_hint { - eprintln!(" {} {}", "hint:".cyan(), hint); - } - } - bail!("auditor output failed schema validation"); - } - println!( - " {} Validated {}", - "✔".green().bold(), - relative_path(project_root, path).display() - ); - } - - let primary_body = std::fs::read_to_string(&primary_path)?; - let secondary_body = std::fs::read_to_string(&secondary_path)?; - - let mut context = build_audit_context(project_root, charter, range)?; - context.auditor_primary_findings = primary_body; - context.auditor_secondary_findings = secondary_body; - let template_path = devtrail_dir .join("audit-prompts") - .join("calibrator-reconciler.md"); + .join("audit-prompt.md"); let template = std::fs::read_to_string(&template_path).with_context(|| { format!( - "Calibrator prompt template not found at {}. Run `devtrail repair`.", + "Audit prompt template not found at {}. Run `devtrail repair` to restore framework files.", template_path.display() ) })?; - let resolved = resolve_audit_template(&template, &context, "calibrator-reconciler"); - let out = prompts_dir.join("calibrator-reconciler.prompt.md"); + let resolved = resolve_audit_template(&template, &context, "auditor"); + let out = audit_dir.join("audit-prompt.md"); std::fs::write(&out, resolved) - .with_context(|| format!("Failed to write {}", out.display()))?; + .with_context(|| format!("Failed to write resolved prompt to {}", out.display()))?; println!( " {} Wrote {}", "✔".green().bold(), @@ -303,28 +204,46 @@ fn run_calibrate( println!(); println!(" {}", "Next:".bold()); println!( - " 1. Run the calibrator prompt in a model of your choice (calibrator may"); - println!(" be of any family per roadmap §5.2 — heterogeneity is for the"); - println!(" auditor pair, not the calibrator)."); + " 1. Open one or more auditor CLIs (gemini-cli, claude-cli, copilot-cli, etc.)" + ); + println!(" in this repo and invoke {} in each.", + format!("/devtrail-audit-execute {}", charter.frontmatter.charter_id).cyan()); println!( - " 2. Save the response to: {}", + " Recommended: at least 2 auditors of different model families." + ); + println!(" 2. Each auditor reads the prompt above, audits with tool use,"); + println!(" and writes its report to:"); + println!( + " {}", audit_dir - .join("calibrator-reconciler.md") + .join("report-.md") .strip_prefix(project_root) .unwrap_or_else(|_| audit_dir.as_ref()) .display() ); println!( - " 3. Run: {} {} --finalize", - "devtrail charter audit".cyan(), - charter.frontmatter.charter_id.cyan() + " 3. When ALL audits you commissioned have finished (NOT before)," ); + println!( + " return to this agent and run: {}", + format!("/devtrail-audit-review {}", charter.frontmatter.charter_id).cyan() + ); + println!(" 4. The review skill consolidates the reports and merges YAML"); + println!(" into telemetry."); Ok(()) } -// ── Step 3: finalize ─────────────────────────────────────────────────────── - -fn run_finalize( +// ── Step 2: merge reports ────────────────────────────────────────────────── +// +// v1 unifies the v0 `--calibrate` + `--finalize` two-step into a single +// `--merge-reports` action. The calibrator role (cross-finding reconciliation, +// severity recalibration, missed-finding detection, remediation plan) is now +// performed by the main agent via the /devtrail-audit-review skill, which +// produces `.devtrail/audits//review.md` consolidated. The CLI's job +// here is mechanical: validate each report against the schema, build per- +// auditor summaries, and emit the YAML block (or merge into telemetry). + +fn run_merge_reports( project_root: &Path, devtrail_dir: &Path, audit_dir: &Path, @@ -334,72 +253,84 @@ fn run_finalize( ) -> Result<()> { println!( "{} {} ({})", - "Step 3/3:".cyan().bold(), - "FINALIZE".bold(), + "MERGE-REPORTS".cyan().bold(), + "audit cycle".bold(), charter.frontmatter.charter_id.dimmed() ); - let primary_path = audit_dir.join("auditor-primary.md"); - let secondary_path = audit_dir.join("auditor-secondary.md"); - let calibrator_path = audit_dir.join("calibrator-reconciler.md"); - - for (label, path) in [ - ("auditor-primary", &primary_path), - ("auditor-secondary", &secondary_path), - ("calibrator-reconciler", &calibrator_path), - ] { - if !path.exists() { - bail!( - "{} not found. {} must exist before --finalize. \ - Re-run --calibrate if the calibrator step is incomplete.", - path.display(), - label - ); + // Gather report-*.md files under the audit dir (one per auditor). + let mut report_paths: Vec = Vec::new(); + if audit_dir.exists() { + for entry in std::fs::read_dir(audit_dir) + .with_context(|| format!("Failed to read {}", audit_dir.display()))? + { + let entry = entry?; + let p = entry.path(); + if !p.is_file() { + continue; + } + let name = match p.file_name().and_then(|n| n.to_str()) { + Some(n) => n, + None => continue, + }; + if name.starts_with("report-") && name.ends_with(".md") { + report_paths.push(p); + } } } + report_paths.sort(); + + if report_paths.is_empty() { + bail!( + "No reports found in {}. Expected one or more files matching report-*.md \ + written by the /devtrail-audit-execute skill (or saved manually by the \ + operator). Run --prepare first if you have not generated the audit prompt.", + relative_path(project_root, audit_dir).display() + ); + } + + if report_paths.len() < 2 { + eprintln!( + "{} only one report found ({}). Cross-family heterogeneity is the \ + discovery mechanism for substantive findings — recommended minimum \ + is 2 auditors of different model families. Proceeding with the \ + single report you provided.", + "warn:".yellow().bold(), + relative_path(project_root, &report_paths[0]).display() + ); + } let schema = AuditOutputSchema::load(devtrail_dir)?; let mut auditor_summaries: Vec = Vec::new(); - for path in [&primary_path, &secondary_path] { - let raw = std::fs::read_to_string(path)?; - let fm = parse_frontmatter(&raw)?; + for path in &report_paths { + let raw = std::fs::read_to_string(path) + .with_context(|| format!("Failed to read {}", path.display()))?; + let fm = parse_frontmatter(&raw) + .with_context(|| format!("Failed to parse frontmatter in {}", path.display()))?; let issues = schema.validate(&fm, path); if !issues.is_empty() { - eprintln!("{} {} failed schema validation", "error:".red().bold(), path.display()); + eprintln!( + "{} {} failed schema validation", + "error:".red().bold(), + path.display() + ); for issue in &issues { eprintln!(" - {}", issue.message); } - bail!("auditor output failed schema validation"); + bail!("auditor report failed schema validation"); } let summary = AuditorSummary::from_frontmatter(&fm)?; println!( - " {} Validated {} ({} findings, prompt: {})", + " {} Validated {} ({} findings)", "✔".green().bold(), relative_path(project_root, path).display(), - summary.findings_total, - summary.prompt_used.dimmed() + summary.findings_total ); auditor_summaries.push(summary); } - let calibrator_raw = std::fs::read_to_string(&calibrator_path)?; - let calibrator_fm = parse_frontmatter(&calibrator_raw)?; - let issues = schema.validate(&calibrator_fm, &calibrator_path); - if !issues.is_empty() { - eprintln!("{} calibrator failed schema validation", "error:".red().bold()); - for issue in &issues { - eprintln!(" - {}", issue.message); - } - bail!("calibrator output failed schema validation"); - } - println!( - " {} Validated {}", - "✔".green().bold(), - relative_path(project_root, &calibrator_path).display() - ); - println!(); - println!(" {}", "Charter audit complete.".green().bold()); + println!(" {}", "Audit cycle merge complete.".green().bold()); println!(); if let Some(target) = merge_into { @@ -421,14 +352,6 @@ fn run_finalize( println!("{}", render_external_audit_yaml(&auditor_summaries, canonical_id)); println!(); } - println!( - " {}", - "Calibrator summary (copy to outcome.scope_change_notes if relevant):".dimmed() - ); - println!( - " {}", - relative_path(project_root, &calibrator_path).display().to_string().dimmed() - ); Ok(()) } @@ -512,8 +435,7 @@ struct AuditContext { ailog_paths: String, ailog_contents: String, schema_path: String, - auditor_primary_findings: String, - auditor_secondary_findings: String, + project_context: String, } fn build_audit_context( @@ -540,8 +462,11 @@ fn build_audit_context( ailog_paths, ailog_contents, schema_path: ".devtrail/schemas/audit-output.schema.v0.json".to_string(), - auditor_primary_findings: String::new(), - auditor_secondary_findings: String::new(), + // {{project_context}} is intentionally empty by default. Adopters + // who want to give auditors a project-stack hint can edit the + // template to substitute it manually, or a future release may + // derive it from CLAUDE.md / config.yml. + project_context: String::new(), }) } @@ -633,11 +558,7 @@ fn resolve_audit_template(template: &str, ctx: &AuditContext, audit_role: &str) ("{{ailog_contents}}", &ctx.ailog_contents), ("{{audit_role}}", audit_role), ("{{schema_path}}", &ctx.schema_path), - ("{{auditor_primary_findings}}", &ctx.auditor_primary_findings), - ( - "{{auditor_secondary_findings}}", - &ctx.auditor_secondary_findings, - ), + ("{{project_context}}", &ctx.project_context), ]; // Find all ranges so we can skip placeholder replacement @@ -704,6 +625,10 @@ struct AuditorSummary { findings_total: u64, findings_by_category: std::collections::BTreeMap, audit_quality: Option, + /// Read but currently unused outside the existing unit test that asserts + /// it parses correctly. Kept on the struct so the schema validation + /// covers it. + #[allow(dead_code)] prompt_used: String, } @@ -823,8 +748,8 @@ mod tests { ailog_paths: "(none)".into(), ailog_contents: "(none)".into(), schema_path: "s".into(), - auditor_primary_findings: String::new(), - auditor_secondary_findings: String::new(), + + project_context: String::new(), }; let out = resolve_audit_template(template, &ctx, "auditor-primary"); assert_eq!( @@ -847,8 +772,8 @@ mod tests { ailog_paths: "".into(), ailog_contents: "".into(), schema_path: "".into(), - auditor_primary_findings: String::new(), - auditor_secondary_findings: String::new(), + + project_context: String::new(), }; let out = resolve_audit_template(template, &ctx, "x"); assert_eq!(out, "CHARTER-01 -- {{unknown_token}}"); @@ -875,8 +800,8 @@ mod tests { ailog_paths: "(none)".into(), ailog_contents: "REAL_AILOGS".into(), schema_path: "s".into(), - auditor_primary_findings: String::new(), - auditor_secondary_findings: String::new(), + + project_context: String::new(), } } diff --git a/cli/src/main.rs b/cli/src/main.rs index 61c2105..f424c33 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -318,23 +318,36 @@ enum CharterCommands { Audit { /// Charter identifier (CHARTER-NN, CHARTER-NN-slug, or just NN) charter_id: String, - /// Git revision range (default: HEAD~1..HEAD) + /// Git revision range (default: origin/main..HEAD with fallback to + /// origin/master..HEAD; falls back to HEAD~1..HEAD with warning when + /// no upstream is reachable). Override with explicit value as needed. #[arg(long)] range: Option, - /// Step 2: read both auditor outputs from - /// audit/charters/CHARTER-NN/ and resolve the calibrator prompt. - #[arg(long, conflicts_with = "finalize")] + /// Generate the unified audit prompt and write it to + /// .devtrail/audits/CHARTER-NN/audit-prompt.md. Default action when + /// no other flag is passed. Equivalent to the v0 PREPARE step. + #[arg(long, conflicts_with_all = ["merge_reports", "calibrate", "finalize"])] + prepare: bool, + /// Read all report-*.md files in .devtrail/audits/CHARTER-NN/, + /// validate them against audit-output.schema.v0.json, and emit the + /// external_audit YAML block. Combine with --merge-into to append + /// the block directly into the Charter's telemetry YAML. + #[arg(long, conflicts_with_all = ["prepare", "calibrate", "finalize"])] + merge_reports: bool, + /// Deprecated v0 flag. The v1 flow does not have a separate calibrate + /// step — the calibrator role is handled by the main agent via the + /// /devtrail-audit-review skill. Emits a warning and exits. + #[arg(long, hide = true, conflicts_with_all = ["prepare", "merge_reports", "finalize"])] calibrate: bool, - /// Step 3: validate all 3 outputs against the schema and print the - /// external_audit YAML block ready to paste into the Charter - /// telemetry. - #[arg(long, conflicts_with = "calibrate")] + /// Deprecated v0 flag. Use --merge-reports instead. Emits a + /// deprecation warning and routes through the new path. + #[arg(long, hide = true, conflicts_with_all = ["prepare", "merge_reports", "calibrate"])] finalize: bool, - /// On --finalize only: append the external_audit array directly into - /// the Charter's telemetry YAML at the given path instead of printing - /// it to stdout. Re-audit (file already has external_audit) is - /// rejected with a clear error in v0. - #[arg(long, requires = "finalize")] + /// With --merge-reports (or deprecated --finalize): append the + /// external_audit array directly into the Charter's telemetry YAML + /// at the given path instead of printing it to stdout. Re-audit + /// (file already has external_audit) is rejected with a clear error. + #[arg(long)] merge_into: Option, /// Project directory (default: current directory) #[arg(long = "path", default_value = ".")] @@ -485,6 +498,8 @@ fn main() { CharterCommands::Audit { charter_id, range, + prepare, + merge_reports, calibrate, finalize, merge_into, @@ -493,6 +508,8 @@ fn main() { &path, &charter_id, range.as_deref(), + prepare, + merge_reports, calibrate, finalize, merge_into.as_deref(), diff --git a/cli/tests/charter_audit_test.rs b/cli/tests/charter_audit_test.rs index efe091d..4ed4eab 100644 --- a/cli/tests/charter_audit_test.rs +++ b/cli/tests/charter_audit_test.rs @@ -1,4 +1,4 @@ -//! Integration tests for `devtrail charter audit` (Phase 3 v0). +//! Integration tests for `devtrail charter audit` (v1 unified flow). use assert_cmd::Command; use predicates::prelude::*; @@ -6,14 +6,8 @@ use std::path::Path; use std::process::Command as StdCommand; use tempfile::TempDir; -const AUDIT_PROMPT_PRIMARY: &str = include_str!( - "../../dist/.devtrail/audit-prompts/auditor-primary.md" -); -const AUDIT_PROMPT_SECONDARY: &str = include_str!( - "../../dist/.devtrail/audit-prompts/auditor-secondary.md" -); -const AUDIT_PROMPT_CALIBRATOR: &str = include_str!( - "../../dist/.devtrail/audit-prompts/calibrator-reconciler.md" +const AUDIT_PROMPT_UNIFIED: &str = include_str!( + "../../dist/.devtrail/audit-prompts/audit-prompt.md" ); const AUDIT_OUTPUT_SCHEMA: &str = include_str!( "../../dist/.devtrail/schemas/audit-output.schema.v0.json" @@ -35,18 +29,8 @@ fn setup_devtrail(dir: &Path) { std::fs::create_dir_all(devtrail.join("templates")).unwrap(); std::fs::write(devtrail.join("config.yml"), "language: en\n").unwrap(); std::fs::write( - devtrail.join("audit-prompts/auditor-primary.md"), - AUDIT_PROMPT_PRIMARY, - ) - .unwrap(); - std::fs::write( - devtrail.join("audit-prompts/auditor-secondary.md"), - AUDIT_PROMPT_SECONDARY, - ) - .unwrap(); - std::fs::write( - devtrail.join("audit-prompts/calibrator-reconciler.md"), - AUDIT_PROMPT_CALIBRATOR, + devtrail.join("audit-prompts/audit-prompt.md"), + AUDIT_PROMPT_UNIFIED, ) .unwrap(); std::fs::write( @@ -56,6 +40,11 @@ fn setup_devtrail(dir: &Path) { .unwrap(); } +/// Helper: returns the v1 canonical audit dir for a Charter under `dir`. +fn audit_dir(dir: &Path, charter_id: &str) -> std::path::PathBuf { + dir.join(".devtrail").join("audits").join(charter_id) +} + fn write_charter(dir: &Path) { let charters = dir.join("docs/charters"); std::fs::create_dir_all(&charters).unwrap(); @@ -130,7 +119,7 @@ fn audit_unknown_charter_fails() { } #[test] -fn audit_prepare_writes_resolved_prompts() { +fn audit_prepare_writes_unified_prompt_to_canonical_location() { if !bash_available() { eprintln!("skipping: git not available"); return; @@ -147,31 +136,33 @@ fn audit_prepare_writes_resolved_prompts() { .assert() .success() .stdout(predicate::str::contains("PREPARE")) - .stdout(predicate::str::contains("auditor-primary.prompt.md")) - .stdout(predicate::str::contains("auditor-secondary.prompt.md")) - .stdout(predicate::str::contains("--calibrate")); + .stdout(predicate::str::contains("audit-prompt.md")) + .stdout(predicate::str::contains("/devtrail-audit-execute")) + .stdout(predicate::str::contains("/devtrail-audit-review")); + + // v1 canonical path: .devtrail/audits/CHARTER-01/audit-prompt.md + // (singular file, not a prompts/ subdirectory with two files). + let resolved_path = audit_dir(dir.path(), "CHARTER-01").join("audit-prompt.md"); + let resolved = std::fs::read_to_string(&resolved_path) + .unwrap_or_else(|_| panic!("expected resolved prompt at {}", resolved_path.display())); - let prompts = dir.path().join("audit/charters/CHARTER-01/prompts"); - let primary = std::fs::read_to_string(prompts.join("auditor-primary.prompt.md")).unwrap(); // Placeholder substitution happened in the body (outside HTML comments). - assert!(primary.contains("CHARTER-01")); - assert!(primary.contains("auditor-primary")); - assert!(primary.contains("docs/charters/01-audit-test.md")); + assert!(resolved.contains("CHARTER-01")); + assert!(resolved.contains("docs/charters/01-audit-test.md")); // Diff was inlined. - assert!(primary.contains("// edited") || primary.contains("// initial")); + assert!(resolved.contains("// edited") || resolved.contains("// initial")); // R10 (issue #102): the resolver must NOT expand placeholders inside - // blocks. The auditor-primary.md template has a - // documentation header that lists placeholders literally; before the - // fix, those expanded and duplicated ~30k tokens of payload. After the - // fix, the header stays as documentation and each placeholder value - // appears in the body proper exactly once. - let header_end = primary.find("-->").expect("template should have an HTML comment header"); - let header = &primary[..header_end]; - let body = &primary[header_end..]; + // blocks. The unified template has a documentation header + // that lists placeholders literally; the resolver preserves them. + let header_end = resolved + .find("-->") + .expect("template should have an HTML comment header"); + let header = &resolved[..header_end]; + let body = &resolved[header_end..]; assert!( header.contains("{{charter_id}}"), - "documentation header must preserve {{{{charter_id}}}} literal: header={header:?}" + "documentation header must preserve {{{{charter_id}}}} literal" ); assert!( header.contains("{{git_diff}}"), @@ -186,12 +177,35 @@ fn audit_prepare_writes_resolved_prompts() { "body (outside comment) must have {{{{git_diff}}}} replaced" ); - let secondary = std::fs::read_to_string(prompts.join("auditor-secondary.prompt.md")).unwrap(); - assert!(secondary.contains("auditor-secondary")); + // v1: the v0 paths under audit/charters/ must NOT be written by the + // v1 CLI. Only the canonical .devtrail/audits//audit-prompt.md is + // produced. + let v0_primary = dir + .path() + .join("audit") + .join("charters") + .join("CHARTER-01") + .join("prompts") + .join("auditor-primary.prompt.md"); + assert!( + !v0_primary.exists(), + "v0 path under audit/charters/ must NOT be written by the v1 CLI" + ); + let v0_secondary = dir + .path() + .join("audit") + .join("charters") + .join("CHARTER-01") + .join("prompts") + .join("auditor-secondary.prompt.md"); + assert!( + !v0_secondary.exists(), + "v0 secondary prompt path must NOT be written by the v1 CLI" + ); } #[test] -fn audit_calibrate_requires_auditor_outputs() { +fn audit_merge_reports_with_no_reports_fails_helpfully() { if !bash_available() { return; } @@ -200,19 +214,19 @@ fn audit_calibrate_requires_auditor_outputs() { write_charter(dir.path()); init_repo_with_diff(dir.path()); - // Skip prepare; go directly to calibrate. + // Skip prepare and any report files; jump straight to merge-reports. Command::cargo_bin("devtrail") .unwrap() - .args(["charter", "audit", "CHARTER-01", "--calibrate", "--path"]) + .args(["charter", "audit", "CHARTER-01", "--merge-reports", "--path"]) .arg(dir.path().to_str().unwrap()) .assert() .failure() - .stderr(predicate::str::contains("auditor-primary.md")) - .stderr(predicate::str::contains("not found")); + .stderr(predicate::str::contains("No reports found")) + .stderr(predicate::str::contains("report-*.md")); } #[test] -fn audit_calibrate_validates_outputs_against_schema() { +fn audit_merge_reports_validates_against_schema() { if !bash_available() { return; } @@ -221,48 +235,28 @@ fn audit_calibrate_validates_outputs_against_schema() { write_charter(dir.path()); init_repo_with_diff(dir.path()); - let audit_dir = dir.path().join("audit/charters/CHARTER-01"); - std::fs::create_dir_all(&audit_dir).unwrap(); + let canonical = audit_dir(dir.path(), "CHARTER-01"); + std::fs::create_dir_all(&canonical).unwrap(); - // Write a malformed auditor-primary.md (missing required findings_total). + // Malformed report (missing required findings_total). std::fs::write( - audit_dir.join("auditor-primary.md"), + canonical.join("report-claude-sonnet-4-6.md"), r#"--- -audit_role: auditor-primary -auditor: copilot +audit_role: auditor +auditor: claude-sonnet-4-6 charter_id: CHARTER-01 audited_at: "2026-05-03" -prompt_used: prompts/auditor-primary.prompt.md +prompt_used: audit-prompt.md --- # bad -"#, - ) - .unwrap(); - std::fs::write( - audit_dir.join("auditor-secondary.md"), - r#"--- -audit_role: auditor-secondary -auditor: gemini -charter_id: CHARTER-01 -audited_at: "2026-05-03" -findings_total: 0 -findings_by_category: - hallucination: 0 - implementation_gap: 0 - real_debt: 0 - false_positive: 0 -prompt_used: prompts/auditor-secondary.prompt.md ---- - -# good "#, ) .unwrap(); Command::cargo_bin("devtrail") .unwrap() - .args(["charter", "audit", "CHARTER-01", "--calibrate", "--path"]) + .args(["charter", "audit", "CHARTER-01", "--merge-reports", "--path"]) .arg(dir.path().to_str().unwrap()) .assert() .failure() @@ -270,7 +264,7 @@ prompt_used: prompts/auditor-secondary.prompt.md } #[test] -fn audit_full_three_step_cycle_succeeds() { +fn audit_merge_reports_handles_n_reports_with_unified_role() { if !bash_available() { return; } @@ -279,149 +273,192 @@ fn audit_full_three_step_cycle_succeeds() { write_charter(dir.path()); init_repo_with_diff(dir.path()); - // Step 1: prepare. + // Prepare (writes the unified prompt). Command::cargo_bin("devtrail") .unwrap() - .args(["charter", "audit", "CHARTER-01", "--path"]) + .args(["charter", "audit", "CHARTER-01", "--prepare", "--path"]) .arg(dir.path().to_str().unwrap()) .assert() .success(); - // Simulate the operator pasting valid auditor responses. - let audit_dir = dir.path().join("audit/charters/CHARTER-01"); - std::fs::write( - audit_dir.join("auditor-primary.md"), - r#"--- -audit_role: auditor-primary -auditor: copilot-v1.0.37 + // Operator saves three reports under the canonical path with the v1 + // unified `audit_role: auditor`. + let canonical = audit_dir(dir.path(), "CHARTER-01"); + for (slug, model, findings) in [ + ("claude-sonnet-4-6", "claude-sonnet-4-6", 2), + ("gemini-2-5-pro", "gemini-2.5-pro", 1), + ("gpt-5-3-codex", "gpt-5.3-codex", 0), + ] { + let body = format!( + r#"--- +audit_role: auditor +auditor: {model} charter_id: CHARTER-01 git_range: "HEAD~1..HEAD" -prompt_used: prompts/auditor-primary.prompt.md +prompt_used: audit-prompt.md audited_at: "2026-05-03" -findings_total: 2 +findings_total: {findings} findings_by_category: hallucination: 0 - implementation_gap: 1 - real_debt: 1 + implementation_gap: {findings} + real_debt: 0 false_positive: 0 audit_quality: high +evidence_citations: {findings} --- +# Body +"#, + model = model, + findings = findings + ); + std::fs::write(canonical.join(format!("report-{slug}.md")), body).unwrap(); + } -# Audit by copilot - -## Findings - -### F1 — minor gap — implementation_gap - -Body. + Command::cargo_bin("devtrail") + .unwrap() + .args(["charter", "audit", "CHARTER-01", "--merge-reports", "--path"]) + .arg(dir.path().to_str().unwrap()) + .assert() + .success() + .stdout(predicate::str::contains("Audit cycle merge complete")) + .stdout(predicate::str::contains("external_audit YAML")) + .stdout(predicate::str::contains("claude-sonnet-4-6")) + .stdout(predicate::str::contains("gemini-2.5-pro")) + .stdout(predicate::str::contains("gpt-5.3-codex")) + // No "warning: only one report" because we have three. + .stderr(predicate::str::contains("only one report").not()); +} -### F2 — leak — real_debt +#[test] +fn audit_merge_reports_warns_on_single_report_but_proceeds() { + if !bash_available() { + return; + } + let dir = TempDir::new().unwrap(); + setup_devtrail(dir.path()); + write_charter(dir.path()); + init_repo_with_diff(dir.path()); -Body. -"#, - ) - .unwrap(); + let canonical = audit_dir(dir.path(), "CHARTER-01"); + std::fs::create_dir_all(&canonical).unwrap(); std::fs::write( - audit_dir.join("auditor-secondary.md"), + canonical.join("report-claude-sonnet-4-6.md"), r#"--- -audit_role: auditor-secondary -auditor: gemini-cli-v1.5 +audit_role: auditor +auditor: claude-sonnet-4-6 charter_id: CHARTER-01 git_range: "HEAD~1..HEAD" -prompt_used: prompts/auditor-secondary.prompt.md +prompt_used: audit-prompt.md audited_at: "2026-05-03" -findings_total: 1 +findings_total: 0 findings_by_category: hallucination: 0 - implementation_gap: 1 + implementation_gap: 0 real_debt: 0 false_positive: 0 -audit_quality: medium --- - -# Audit by gemini - -## Findings - -### F1 — overlapping gap — implementation_gap - -Body. +# Body "#, ) .unwrap(); - // Step 2: calibrate. Command::cargo_bin("devtrail") .unwrap() - .args(["charter", "audit", "CHARTER-01", "--calibrate", "--path"]) + .args(["charter", "audit", "CHARTER-01", "--merge-reports", "--path"]) .arg(dir.path().to_str().unwrap()) .assert() .success() - .stdout(predicate::str::contains("CALIBRATE")) - .stdout(predicate::str::contains("calibrator-reconciler.prompt.md")) - .stdout(predicate::str::contains("--finalize")); + .stderr(predicate::str::contains("only one report")) + .stderr(predicate::str::contains("heterogeneity")); +} - // The resolved calibrator prompt should embed both auditors' findings. - let cal = std::fs::read_to_string( - audit_dir.join("prompts/calibrator-reconciler.prompt.md"), - ) - .unwrap(); - assert!(cal.contains("calibrator-reconciler")); - assert!(cal.contains("copilot-v1.0.37"), "primary auditor body should be embedded"); - assert!(cal.contains("gemini-cli-v1.5"), "secondary auditor body should be embedded"); +#[test] +fn audit_deprecated_calibrate_emits_warning_and_exits() { + if !bash_available() { + return; + } + let dir = TempDir::new().unwrap(); + setup_devtrail(dir.path()); + write_charter(dir.path()); + init_repo_with_diff(dir.path()); + + Command::cargo_bin("devtrail") + .unwrap() + .args(["charter", "audit", "CHARTER-01", "--calibrate", "--path"]) + .arg(dir.path().to_str().unwrap()) + .assert() + .failure() + .stderr(predicate::str::contains("v0 way")) + .stderr(predicate::str::contains("/devtrail-audit-review")) + .stderr(predicate::str::contains("--merge-reports")); +} + +#[test] +fn audit_deprecated_finalize_redirects_to_merge_reports() { + if !bash_available() { + return; + } + let dir = TempDir::new().unwrap(); + setup_devtrail(dir.path()); + write_charter(dir.path()); + init_repo_with_diff(dir.path()); - // Simulate calibrator response. + let canonical = audit_dir(dir.path(), "CHARTER-01"); + std::fs::create_dir_all(&canonical).unwrap(); std::fs::write( - audit_dir.join("calibrator-reconciler.md"), + canonical.join("report-claude-sonnet-4-6.md"), r#"--- -audit_role: calibrator-reconciler -calibrator: claude-opus-4 +audit_role: auditor +auditor: claude-sonnet-4-6 charter_id: CHARTER-01 git_range: "HEAD~1..HEAD" -prompt_used: prompts/calibrator-reconciler.prompt.md -calibrated_at: "2026-05-03" -auditors_reconciled: - - auditor-primary.md - - auditor-secondary.md -findings_consolidated: 2 -findings_by_status: - agreed: 1 - disputed: 0 - unique_primary: 1 - unique_secondary: 0 - rejected: 0 +prompt_used: audit-prompt.md +audited_at: "2026-05-03" +findings_total: 0 +findings_by_category: + hallucination: 0 + implementation_gap: 0 + real_debt: 0 + false_positive: 0 --- - -# Calibration - -## Reconciliation summary - -Both auditors converged on the implementation_gap; primary added a real_debt -that secondary missed. +# Body "#, ) .unwrap(); - // Step 3: finalize. + // --finalize should warn but proceed via the merge-reports path. Command::cargo_bin("devtrail") .unwrap() .args(["charter", "audit", "CHARTER-01", "--finalize", "--path"]) .arg(dir.path().to_str().unwrap()) .assert() .success() - .stdout(predicate::str::contains("FINALIZE")) - .stdout(predicate::str::contains("Charter audit complete")) - .stdout(predicate::str::contains("external_audit YAML")) - // Both auditors appear in the rendered YAML. - .stdout(predicate::str::contains("copilot-v1.0.37")) - .stdout(predicate::str::contains("gemini-cli-v1.5")); + .stderr(predicate::str::contains("--finalize is the v0 name")) + .stderr(predicate::str::contains("--merge-reports")) + .stdout(predicate::str::contains("Audit cycle merge complete")); } #[test] -fn audit_calibrate_and_finalize_are_mutually_exclusive() { +fn audit_action_flags_are_mutually_exclusive() { let dir = TempDir::new().unwrap(); setup_devtrail(dir.path()); + // --prepare and --merge-reports must not co-occur (clap-enforced). + Command::cargo_bin("devtrail") + .unwrap() + .args([ + "charter", + "audit", + "CHARTER-01", + "--prepare", + "--merge-reports", + "--path", + ]) + .arg(dir.path().to_str().unwrap()) + .assert() + .failure(); + + // Deprecated flags also conflict with each other and with the new ones. Command::cargo_bin("devtrail") .unwrap() .args([ @@ -437,32 +474,32 @@ fn audit_calibrate_and_finalize_are_mutually_exclusive() { .failure(); } -// ── --merge-into: PR 2 of audit-skills rollout ───────────────────────────── +// ── --merge-into: PR 2 of audit-skills rollout (updated for v1 paths) ───── -/// Set up a Charter that has been fully audited (3 outputs present), so we -/// can drive --finalize repeatedly with different --merge-into targets. +/// Set up a Charter with two valid v1 reports under .devtrail/audits/CHARTER-01/, +/// so we can drive --merge-reports + --merge-into repeatedly. fn setup_finalized_audit(dir: &Path) { setup_devtrail(dir); write_charter(dir); init_repo_with_diff(dir); - // PREPARE + // PREPARE writes the unified audit-prompt.md to the canonical path. Command::cargo_bin("devtrail") .unwrap() - .args(["charter", "audit", "CHARTER-01", "--path"]) + .args(["charter", "audit", "CHARTER-01", "--prepare", "--path"]) .arg(dir.to_str().unwrap()) .assert() .success(); - let audit_dir = dir.join("audit/charters/CHARTER-01"); + let canonical = audit_dir(dir, "CHARTER-01"); std::fs::write( - audit_dir.join("auditor-primary.md"), + canonical.join("report-copilot-v1-0-37.md"), r#"--- -audit_role: auditor-primary +audit_role: auditor auditor: copilot-v1.0.37 charter_id: CHARTER-01 git_range: "HEAD~1..HEAD" -prompt_used: prompts/auditor-primary.prompt.md +prompt_used: audit-prompt.md audited_at: "2026-05-03" findings_total: 2 findings_by_category: @@ -477,13 +514,13 @@ audit_quality: high ) .unwrap(); std::fs::write( - audit_dir.join("auditor-secondary.md"), + canonical.join("report-gemini-cli-v1-5.md"), r#"--- -audit_role: auditor-secondary +audit_role: auditor auditor: gemini-cli-v1.5 charter_id: CHARTER-01 git_range: "HEAD~1..HEAD" -prompt_used: prompts/auditor-secondary.prompt.md +prompt_used: audit-prompt.md audited_at: "2026-05-03" findings_total: 1 findings_by_category: @@ -494,33 +531,6 @@ findings_by_category: audit_quality: medium --- # Body -"#, - ) - .unwrap(); - - // CALIBRATE (the CLI writes calibrator-reconciler.prompt.md but here we - // just simulate the operator pasting the calibrator response directly). - std::fs::write( - audit_dir.join("calibrator-reconciler.md"), - r#"--- -audit_role: calibrator-reconciler -calibrator: claude-opus-4 -charter_id: CHARTER-01 -git_range: "HEAD~1..HEAD" -prompt_used: prompts/calibrator-reconciler.prompt.md -calibrated_at: "2026-05-03" -auditors_reconciled: - - auditor-primary.md - - auditor-secondary.md -findings_consolidated: 2 -findings_by_status: - agreed: 1 - disputed: 0 - unique_primary: 1 - unique_secondary: 0 - rejected: 0 ---- -# Body "#, ) .unwrap(); @@ -565,7 +575,7 @@ fn audit_merge_into_appends_external_audit_to_telemetry() { "charter", "audit", "CHARTER-01", - "--finalize", + "--merge-reports", "--merge-into", telemetry_path.to_str().unwrap(), "--path", @@ -582,14 +592,14 @@ fn audit_merge_into_appends_external_audit_to_telemetry() { ); assert!( merged.contains(" - auditor: \"copilot-v1.0.37\""), - "primary auditor present in merged output" + "first auditor present in merged output" ); assert!( merged.contains(" - auditor: \"gemini-cli-v1.5\""), - "secondary auditor present in merged output" + "second auditor present in merged output" ); assert!( - merged.contains("audit/charters/CHARTER-01/auditor-primary.md"), + merged.contains("audit/charters/CHARTER-01/"), "audit_notes must reference real charter id (not placeholder)" ); // Pre-existing keys preserved. @@ -612,7 +622,7 @@ fn audit_merge_into_missing_telemetry_fails_with_helpful_message() { "charter", "audit", "CHARTER-01", - "--finalize", + "--merge-reports", "--merge-into", missing.to_str().unwrap(), "--path", @@ -644,7 +654,7 @@ fn audit_merge_into_rejects_existing_external_audit() { "charter", "audit", "CHARTER-01", - "--finalize", + "--merge-reports", "--merge-into", telemetry_path.to_str().unwrap(), "--path", @@ -656,11 +666,12 @@ fn audit_merge_into_rejects_existing_external_audit() { } #[test] -fn audit_merge_into_requires_finalize() { +fn audit_merge_into_requires_merge_reports_or_finalize() { let dir = TempDir::new().unwrap(); setup_devtrail(dir.path()); - // Without --finalize, clap should reject --merge-into. + // Without --merge-reports (or deprecated --finalize), the CLI should + // reject --merge-into with a clear error. Command::cargo_bin("devtrail") .unwrap() .args([ @@ -673,7 +684,8 @@ fn audit_merge_into_requires_finalize() { ]) .arg(dir.path().to_str().unwrap()) .assert() - .failure(); + .failure() + .stderr(predicate::str::contains("--merge-into is only valid with --merge-reports")); } // ── R11(A) regression tests (issue #102) ─────────────────────────────────── @@ -742,7 +754,7 @@ fn audit_default_range_uses_origin_main_when_available() { let prompt = std::fs::read_to_string( dir.path() - .join("audit/charters/CHARTER-01/prompts/auditor-primary.prompt.md"), + .join(".devtrail/audits/CHARTER-01/audit-prompt.md"), ) .unwrap(); assert!( @@ -779,7 +791,7 @@ fn audit_default_range_falls_back_to_head_minus_one_without_remote() { let prompt = std::fs::read_to_string( dir.path() - .join("audit/charters/CHARTER-01/prompts/auditor-primary.prompt.md"), + .join(".devtrail/audits/CHARTER-01/audit-prompt.md"), ) .unwrap(); assert!( diff --git a/dist/.devtrail/audit-prompts/auditor-primary.md b/dist/.devtrail/audit-prompts/auditor-primary.md deleted file mode 100644 index e873c00..0000000 --- a/dist/.devtrail/audit-prompts/auditor-primary.md +++ /dev/null @@ -1,154 +0,0 @@ - - -You are an external auditor reviewing the execution of a DevTrail Charter. -Your job is to compare what the Charter declared (ex-ante) against what the -commits actually changed (ex-post) and produce a categorized list of findings. - -You are the **{{audit_role}}** auditor in a dual-audit cycle. Another -auditor of a different model family is being given the same Charter and diff -in parallel. A calibrator-reconciler will later compare your findings against -theirs. Cross-model heterogeneity is the point — your distribution of -training and your blind spots differ from the other auditor's, and that is -what makes the convergence (or disagreement) signal valuable. - -# What you are auditing - -**Charter:** `{{charter_path}}` (`{{charter_id}}` — {{charter_title}}) - -**Git range:** `{{git_range}}` - -**Originating AILOGs** (rationale + emergent risks documented during execution): - -``` -{{ailog_paths}} -``` - -# Charter content - -```markdown -{{charter_content}} -``` - -# AILOG content - -```markdown -{{ailog_contents}} -``` - -# Diff - -```diff -{{git_diff}} -``` - -# What I need from you - -Produce a markdown file with this exact frontmatter shape (validates against -`{{schema_path}}`): - -```yaml ---- -audit_role: auditor-primary -auditor: # e.g., copilot-v1.0.37 -charter_id: {{charter_id}} -git_range: "{{git_range}}" -prompt_used: prompts/auditor-primary.prompt.md -audited_at: -findings_total: -findings_by_category: - hallucination: - implementation_gap: - real_debt: - false_positive: ---- - -# Audit: {{charter_id}} by - -## Summary - -[1-2 paragraphs: did the execution match the Charter's declared scope? What -is the overall verdict — clean, partial, deviated?] - -## Findings - -### F1 — - -**Where:** `` or `` if span-wide. - -**What I observed:** [Concrete description of the gap, hallucination, or -real debt. Cite specific lines from the diff or the AILOGs.] - -**Why I'm flagging it:** [Reasoning. What about the Charter's declaration vs -the diff makes this a finding?] - -### F2 — ... - -[Continue numbering F1...FN. One section per finding.] -``` - -# Categorization rules - -Apply the following categories. The calibrator will use the same definitions: - -- **`hallucination`** — the Charter or implementation references something - that does not exist (an API, a function, a field name, a behavior). The - agent invented it. Verify by reading the diff or the cited file. -- **`implementation_gap`** — the Charter declared work that the diff did - not deliver, OR the diff delivered work the Charter did not declare, - WITHOUT it being documented as drift in the AILOG. (If documented in - AILOG under `## Risk` as `R`, that is *not* a gap; the AILOG-aware - drift check already accepts it.) -- **`real_debt`** — code-level concern that is correct as far as the - Charter goes but introduces technical debt or a subtle defect (a missing - error path, a leaky resource, a non-idempotent operation). Adopter is - expected to capture as `TDE` doc post-audit. -- **`false_positive`** — what initially looked like a finding but, on - closer inspection of the AILOGs or the diff context, isn't one. - Document anyway; the calibrator uses these to recognize patterns where - one auditor over-reports. - -# Discipline - -- Cite specific file paths and line numbers from the diff. Do not summarize - abstractly. -- If you cannot find anything substantive, return `findings_total: 0` with - a single `## Summary` paragraph explaining what you reviewed. Empty audits - are valid signal — the calibrator will note convergence with the other - auditor's empty audit, if applicable. -- Do not fabricate findings to seem thorough. The categorization rules - above include `false_positive` precisely because over-reporting is a - real audit failure mode. -- Do not consult external sources beyond what is provided in this prompt. - The audit must be reproducible from the prompt + the diff + the AILOGs - alone. diff --git a/dist/.devtrail/audit-prompts/auditor-secondary.md b/dist/.devtrail/audit-prompts/auditor-secondary.md deleted file mode 100644 index a2651fd..0000000 --- a/dist/.devtrail/audit-prompts/auditor-secondary.md +++ /dev/null @@ -1,131 +0,0 @@ - - -You are an independent external auditor reviewing the execution of a -DevTrail Charter. You are the **{{audit_role}}** auditor. A primary auditor -of a different model family is reviewing the same Charter and diff in -parallel. The two of you may agree or disagree; both are valuable signal. -A calibrator-reconciler will integrate your findings with the primary's. - -You may have been trained on different data than the primary. Your blind -spots and your priors are different. Audit independently — the value of the -dual-audit comes from convergence on real findings and divergence on -boundary cases, not from echoing the primary auditor. - -# What you are auditing - -**Charter:** `{{charter_path}}` (`{{charter_id}}` — {{charter_title}}) - -**Git range:** `{{git_range}}` - -**Originating AILOGs** (rationale + emergent risks documented during execution): - -``` -{{ailog_paths}} -``` - -# Charter content - -```markdown -{{charter_content}} -``` - -# AILOG content - -```markdown -{{ailog_contents}} -``` - -# Diff - -```diff -{{git_diff}} -``` - -# What I need from you - -Produce a markdown file with this exact frontmatter shape (validates against -`{{schema_path}}`): - -```yaml ---- -audit_role: auditor-secondary -auditor: # e.g., gemini-cli-v1.5 -charter_id: {{charter_id}} -git_range: "{{git_range}}" -prompt_used: prompts/auditor-secondary.prompt.md -audited_at: -findings_total: -findings_by_category: - hallucination: - implementation_gap: - real_debt: - false_positive: ---- - -# Audit: {{charter_id}} by - -## Summary - -[1-2 paragraphs: did the execution match the Charter's declared scope? -What is the overall verdict?] - -## Findings - -### F1 — - -**Where:** `` or `` if span-wide. - -**What I observed:** [Concrete description. Cite specific lines from the -diff or the AILOGs.] - -**Why I'm flagging it:** [Reasoning. What about the Charter's declaration -vs the diff makes this a finding?] - -### F2 — ... - -[One section per finding.] -``` - -# Categorization rules - -Same categories as the primary auditor — the calibrator uses the same -definitions to compare your findings: - -- **`hallucination`** — Charter or implementation references something - that does not exist (invented API, function, field, behavior). Verify - by reading the diff or cited file. -- **`implementation_gap`** — Charter declared work the diff did not - deliver (or vice versa) WITHOUT it being documented as drift in the - AILOG. (Documented in AILOG `## Risk` as `R` is *not* a gap.) -- **`real_debt`** — code-level concern not strictly within Charter - scope but introducing debt or a subtle defect (missing error path, - leaky resource, non-idempotent operation). Adopter captures as `TDE`. -- **`false_positive`** — looked like a finding but, on closer reading - of the AILOGs or diff context, isn't. Document anyway; calibrator - uses these to detect over-reporting patterns. - -# Discipline - -- Cite specific file paths and line numbers from the diff. No abstract - summaries. -- If you find nothing substantive, return `findings_total: 0` with a - `## Summary` paragraph explaining your review. Empty is valid signal. -- Do not fabricate findings to seem thorough. Over-reporting is a real - audit failure mode — `false_positive` exists precisely for this case. -- Do not consult external sources beyond this prompt. The audit must be - reproducible from the prompt + diff + AILOGs alone. diff --git a/dist/.devtrail/audit-prompts/calibrator-reconciler.md b/dist/.devtrail/audit-prompts/calibrator-reconciler.md deleted file mode 100644 index 1dd4763..0000000 --- a/dist/.devtrail/audit-prompts/calibrator-reconciler.md +++ /dev/null @@ -1,173 +0,0 @@ - - -You are the **calibrator-reconciler** of a DevTrail dual-audit cycle. Two -external auditors of different model families have already reviewed the -Charter; their outputs are below. Your job is to apply the categorization -schema definitionally, recognize agreement and disagreement, and produce a -consolidated list of findings that the Charter's telemetry can record. - -You are not auditing fresh. You are reading two audits and reconciling them. - -# What you are reconciling - -**Charter:** `{{charter_path}}` (`{{charter_id}}` — {{charter_title}}) - -**Git range:** `{{git_range}}` - -# Charter content - -```markdown -{{charter_content}} -``` - -# Originating AILOGs - -``` -{{ailog_paths}} -``` - -```markdown -{{ailog_contents}} -``` - -# Auditor PRIMARY output - -```markdown -{{auditor_primary_findings}} -``` - -# Auditor SECONDARY output - -```markdown -{{auditor_secondary_findings}} -``` - -# What I need from you - -Produce a markdown file with this exact frontmatter shape (validates against -`{{schema_path}}`): - -```yaml ---- -audit_role: calibrator-reconciler -calibrator: # e.g., claude-opus-4 -charter_id: {{charter_id}} -git_range: "{{git_range}}" -prompt_used: prompts/calibrator-reconciler.prompt.md -calibrated_at: -auditors_reconciled: - - auditor-primary.md - - auditor-secondary.md -findings_consolidated: -findings_by_status: - agreed: # both auditors flagged the same finding - disputed: # both flagged but disagreed on category — you picked - unique_primary: # only primary; you validated as legitimate - unique_secondary: # only secondary; you validated - rejected: # both flagged but you determined false positive ---- - -# Calibration: {{charter_id}} - -## Reconciliation summary - -[1-2 paragraphs: how convergent were the auditors? Where did they -disagree, and on what kind of finding? Did one auditor have a higher -false-positive rate?] - -## Reconciled findings - -### C1 — - -**Status:** agreed | disputed | unique_primary | unique_secondary | rejected. - -**Where:** ``. - -**What was observed:** [Combine the auditors' descriptions. If they -disagreed, note both views and your resolution.] - -**Calibration rationale:** [Why this status. If `agreed`, name what each -auditor said. If `disputed`, name the disagreement and your call. If -`unique_*`, explain why you validated. If `rejected`, explain why both -auditors were wrong.] - -### C2 — ... - -[One section per consolidated finding. Numbering C1...CN is independent -of the F1...FN numbering each auditor used; cross-reference auditor -numbering inside each section as needed.] -``` - -# Categorization rules (same as the auditors) - -- **`hallucination`** — invented API, function, field, behavior. -- **`implementation_gap`** — declared but not delivered (or vice versa) - WITHOUT being documented in AILOG as drift. -- **`real_debt`** — code-level debt or subtle defect outside Charter scope. -- **`false_positive`** — appeared to be a finding but isn't. - -# Status assignment rules - -For each distinct finding (deduplicate when both auditors describe the -same gap with different wording): - -- `agreed` — both auditors flagged it AND assigned the same category. - Strongest signal — the convergence between heterogeneous auditors is - what makes a dual-audit valuable. -- `disputed` — both auditors flagged it BUT assigned different categories - (e.g., primary calls it `implementation_gap`, secondary calls it - `hallucination`). You pick the category that fits the schema definitions - best, given the diff and the AILOGs. -- `unique_primary` / `unique_secondary` — only one auditor flagged it, - AND on your reading, they were correct to flag it. -- `rejected` — one or both auditors flagged it, but on closer reading - of the AILOGs (especially `## Risk` `R` documented mitigations) - or the diff, it isn't a finding. Both `unique` flags can become - `rejected` if the unique auditor was wrong. - -# Discipline - -- Use the `findings_by_status` counts as a cross-check against your - body sections. They must add up to `findings_consolidated`. -- Do not introduce findings the auditors did not see. If you spot - something they missed, document it in `## Reconciliation summary` as - an observation, not as a `C` finding. Fresh findings are out of - scope for the calibrator role — that's what the next audit cycle is for. -- The `rejected` count is signal worth tracking — it tells the Charter - author which audit categories tend to over-report on this kind of - Charter, which improves future audit prompt design. -- Do not consult external sources beyond what is provided. The - reconciliation must be reproducible from the prompt + the two auditor - outputs + the Charter + the AILOGs.