From 12fd03b07fa25759ff75a1e424bee288f5dd4f29 Mon Sep 17 00:00:00 2001 From: David Cramer Date: Wed, 18 Feb 2026 03:07:14 -0800 Subject: [PATCH] feat(telemetry): Add global scope attributes, run metric, and trace ID surfacing Replace per-metric source/repository threading with global Sentry scope attributes that propagate to all metrics automatically. Add workflow.runs counter for "total unique runs" queries. Surface trace ID in CLI summary (Verbose+), debug output, structured logs, and JSONL metadata so operators can correlate runs to Sentry traces. Co-Authored-By: Claude Opus 4.6 --- specs/reporters.md | 10 +++--- specs/telemetry.md | 48 ++++++++++++++++++++++------ src/action/workflow/pr-workflow.ts | 11 +++++-- src/cli/main.ts | 22 ++++++++++--- src/cli/output/jsonl.ts | 5 ++- src/cli/output/reporter.ts | 8 ++++- src/cli/output/tasks.ts | 4 +-- src/sentry.ts | 51 ++++++++++++++++++++++-------- 8 files changed, 122 insertions(+), 37 deletions(-) diff --git a/specs/reporters.md b/specs/reporters.md index 16d50f1..bebd87e 100644 --- a/specs/reporters.md +++ b/specs/reporters.md @@ -407,7 +407,7 @@ At `Verbose+`. #### 19. `analysis_complete` -Fields: `findingsCount`, `failedHunks?`, `failedExtractions?`, `skippedFiles?`, `usage?`, `totalDuration` +Fields: `findingsCount`, `failedHunks?`, `failedExtractions?`, `skippedFiles?`, `usage?`, `totalDuration`, `traceId?` This is a multi-line summary section, not a single event. @@ -420,8 +420,9 @@ SUMMARY Use -v for failure details 3 files skipped Analysis completed in 4.2s · 5.0k in / 800 out · $0.01 +Trace: abc123def456... ``` -(Counts colored by severity. Warnings in yellow. Hint dimmed. Usage line dimmed.) +(Counts colored by severity. Warnings in yellow. Hint dimmed. Usage line dimmed. Trace ID dimmed, `Verbose+` only, only when Sentry is initialized.) The `-v` hint appears only when there are failures (`failedHunks > 0` or `failedExtractions > 0`) **and** verbosity is below `Verbose`. At `Verbose+`, per-hunk details are already shown via events 16-18 so the hint is suppressed. @@ -434,8 +435,9 @@ The `-v` hint appears only when there are failures (`failedHunks > 0` or `failed [2026-02-08T14:30:56.000Z] warden: 3 files skipped [2026-02-08T14:30:56.000Z] warden: Usage: 5.0k input, 800 output, $0.01 [2026-02-08T14:30:56.000Z] warden: Total time: 4.2s +[2026-02-08T14:30:56.000Z] warden: Trace: abc123def456... ``` -Warnings and skipped files only shown when non-zero. The `-v` hint follows the same gating as TTY. +Warnings and skipped files only shown when non-zero. The `-v` hint follows the same gating as TTY. Trace ID shown at `Verbose+` only, when Sentry is initialized. **JSONL:** Summary record (the last line in the JSONL file) @@ -555,7 +557,7 @@ Each line in a JSONL file is one of three record types, discriminated by the pre ### Shared Types -**RunMetadata**: `{ timestamp: string, durationMs: number, cwd: string }` +**RunMetadata**: `{ timestamp: string, durationMs: number, cwd: string, traceId?: string }` **UsageStats**: `{ inputTokens: int, outputTokens: int, cacheReadInputTokens?: int, cacheCreationInputTokens?: int, costUSD: number }` -- `inputTokens` is the total input token count; `cacheReadInputTokens` and `cacheCreationInputTokens` are subsets of it. diff --git a/specs/telemetry.md b/specs/telemetry.md index a84bc88..e2dbdfb 100644 --- a/specs/telemetry.md +++ b/specs/telemetry.md @@ -23,6 +23,26 @@ Observability via Sentry: tracing, error context, and business metrics. All tele | `tracesSampleRate` | `1.0` (every transaction traced) | | `enableLogs` | `true` (structured Sentry logs) | +### Global Attributes + +Set via `Sentry.getGlobalScope().setAttributes()`. These propagate automatically to all metrics and spans. + +| Attribute | Set when | Value | +|-----------|----------|-------| +| `warden.source` | `initSentry()` | `github-action` or `cli` | +| `warden.repository` | After context built | `owner/repo` (e.g. `getsentry/sentry`) | + +### Trace ID + +The trace ID from the root span serves as the unique run identifier. It is surfaced in: + +- **CLI summary** (`-v`): Dimmed `Trace: {id}` line in the SUMMARY section at Verbose+ verbosity +- **CLI debug output** (`-vv`): `reporter.debug()` at the start of the command span (safety net if run crashes before summary) +- **Sentry structured logs**: `trace.id` field in the `Workflow initialized` log entry +- **JSONL run metadata**: `traceId` field in `JsonlRunMetadata` + +Operators can use the trace ID to locate the corresponding Sentry trace for any Warden run. + ### Integrations | Integration | Purpose | @@ -229,16 +249,26 @@ Untagged `captureException` calls exist at top-level catch handlers in `src/cli/ Emitted via `Sentry.metrics.*`. Each function is a no-op when Sentry is not initialized and wrapped in try/catch so metrics never break the workflow. +All metrics inherit `warden.source` and `warden.repository` from the global scope (see **Global Attributes** above). Only per-metric attributes are listed below. + +### Run count (`emitRunMetric`) + +| Metric | Type | Per-metric attributes | +|--------|------|-----------------------| +| `workflow.runs` | count | -- (inherits globals) | + +Called once per analysis workflow execution (CLI run or GitHub Action workflow). + ### Skill-level (`emitSkillMetrics`) -| Metric | Type | Attributes | -|--------|------|------------| -| `skill.duration` | distribution (ms) | `skill`, `repository`, `source` | -| `tokens.input` | distribution | `skill`, `repository`, `source` | -| `tokens.output` | distribution | `skill`, `repository`, `source` | -| `cost.usd` | distribution | `skill`, `repository`, `source` | -| `findings.total` | count | `skill`, `repository`, `source` | -| `findings` | count | `skill`, `repository`, `source`, `severity` | +| Metric | Type | Per-metric attributes | +|--------|------|-----------------------| +| `skill.duration` | distribution (ms) | `skill` | +| `tokens.input` | distribution | `skill` | +| `tokens.output` | distribution | `skill` | +| `cost.usd` | distribution | `skill` | +| `findings.total` | count | `skill` | +| `findings` | count | `skill`, `severity` | ### Extraction (`emitExtractionMetrics`) @@ -305,7 +335,7 @@ Called from `evaluateFixesAndResolveStale` when stale comments are resolved. | File | Role | |------|------| -| `src/sentry.ts` | Init, integrations, metric emission functions | +| `src/sentry.ts` | Init, integrations, global attributes, metric emission functions | | `src/sdk/analyze.ts` | `executeQuery` (gen AI span), `analyzeFile` / `analyzeHunk` (workflow spans), extraction + retry + dedup metrics | | `src/action/fix-evaluation/index.ts` | `evaluateFixAttempts` / per-comment spans, fix eval metrics | | `src/action/workflow/base.ts` | `ActionFailedError` sentinel, `setFailed()` | diff --git a/src/action/workflow/pr-workflow.ts b/src/action/workflow/pr-workflow.ts index b384911..fe46163 100644 --- a/src/action/workflow/pr-workflow.ts +++ b/src/action/workflow/pr-workflow.ts @@ -7,7 +7,7 @@ import { readFileSync } from 'node:fs'; import { dirname, join } from 'node:path'; import type { Octokit } from '@octokit/rest'; -import { Sentry, logger, emitStaleResolutionMetric } from '../../sentry.js'; +import { Sentry, logger, emitStaleResolutionMetric, setGlobalAttributes, emitRunMetric } from '../../sentry.js'; import { loadWardenConfig, resolveSkillConfigs } from '../../config/loader.js'; import type { ResolvedTrigger } from '../../config/loader.js'; import type { WardenConfig } from '../../config/schema.js'; @@ -631,7 +631,14 @@ export async function runPRWorkflow( }); } - logger.info('Workflow initialized', { 'trigger.count': matchedTriggers.length }); + setGlobalAttributes({ 'warden.repository': context.repository.fullName }); + emitRunMetric(); + + const traceId = span.spanContext().traceId; + logger.info('Workflow initialized', { + 'trigger.count': matchedTriggers.length, + 'trace.id': traceId, + }); if (matchedTriggers.length === 0) { await cleanupOrphanedComments(octokit, context, inputs.anthropicApiKey); diff --git a/src/cli/main.ts b/src/cli/main.ts index 482f5a7..74d70f0 100644 --- a/src/cli/main.ts +++ b/src/cli/main.ts @@ -1,7 +1,7 @@ import { existsSync } from 'node:fs'; import { dirname, join, resolve } from 'node:path'; import { config as dotenvConfig } from 'dotenv'; -import { Sentry, flushSentry } from '../sentry.js'; +import { Sentry, flushSentry, setGlobalAttributes, emitRunMetric, getTraceId } from '../sentry.js'; import { loadWardenConfig, resolveSkillConfigs } from '../config/loader.js'; import type { SkillRunnerOptions } from '../sdk/runner.js'; import { resolveSkillAsync } from '../skills/loader.js'; @@ -138,14 +138,15 @@ async function outputResultsAndHandleFixes( const { reports, filteredReports, hasFailure, failureReasons } = processed; // Write JSONL output if requested (uses unfiltered reports for complete data) + const traceId = getTraceId(); if (options.output) { - writeJsonlReport(options.output, reports, totalDuration); + writeJsonlReport(options.output, reports, totalDuration, { traceId }); reporter.success(`Wrote JSONL output to ${options.output}`); } // Always write automatic run log for debugging const runLogPath = getRunLogPath(repoPath); - writeJsonlReport(runLogPath, reports, totalDuration); + writeJsonlReport(runLogPath, reports, totalDuration, { traceId }); reporter.debug(`Run log: ${runLogPath}`); // Collect fixable findings early so we know whether to suppress diffs in the report @@ -174,7 +175,7 @@ async function outputResultsAndHandleFixes( // Show summary (uses filtered reports for display) reporter.blank(); - reporter.renderSummary(filteredReports, totalDuration); + reporter.renderSummary(filteredReports, totalDuration, { traceId }); // Handle fixes: --fix (automatic) always runs, interactive step-through in TTY mode if (fixableFindings.length > 0) { @@ -267,6 +268,10 @@ async function runSkills( skillsToRun = []; } + // Set global telemetry context and emit run metric + setGlobalAttributes({ 'warden.repository': context.repository.fullName }); + emitRunMetric(); + // Handle case where no skills to run if (skillsToRun.length === 0) { if (options.json) { @@ -488,6 +493,10 @@ async function runConfigMode(options: CLIOptions, reporter: Reporter): Promise matchTrigger(t, context, 'local')); @@ -696,6 +705,11 @@ export async function main(): Promise { async (span) => { span.setAttribute('cli.command', command); + const traceId = getTraceId(); + if (traceId) { + reporter.debug(`Trace ID: ${traceId}`); + } + switch (command) { case 'init': return runInit(options, reporter); diff --git a/src/cli/output/jsonl.ts b/src/cli/output/jsonl.ts index ba254c8..1e7cd56 100644 --- a/src/cli/output/jsonl.ts +++ b/src/cli/output/jsonl.ts @@ -43,6 +43,7 @@ export interface JsonlRunMetadata { timestamp: string; durationMs: number; cwd: string; + traceId?: string; } /** @@ -125,7 +126,8 @@ function aggregateUsage(reports: SkillReport[]): UsageStats | undefined { export function writeJsonlReport( outputPath: string, reports: SkillReport[], - durationMs: number + durationMs: number, + options?: { traceId?: string } ): void { const resolvedPath = resolve(process.cwd(), outputPath); const timestamp = new Date().toISOString(); @@ -135,6 +137,7 @@ export function writeJsonlReport( timestamp, durationMs, cwd, + traceId: options?.traceId, }; const lines: string[] = []; diff --git a/src/cli/output/reporter.ts b/src/cli/output/reporter.ts index 2eed27b..6d792b6 100644 --- a/src/cli/output/reporter.ts +++ b/src/cli/output/reporter.ts @@ -185,7 +185,7 @@ export class Reporter { /** * Render the summary section. */ - renderSummary(reports: SkillReport[], totalDuration: number): void { + renderSummary(reports: SkillReport[], totalDuration: number, options?: { traceId?: string }): void { const allFindings: Finding[] = []; let totalFailedHunks = 0; let totalFailedExtractions = 0; @@ -227,6 +227,9 @@ export class Reporter { } else { this.log(chalk.dim(durationLine)); } + if (options?.traceId && this.verbosity >= Verbosity.Verbose) { + this.log(chalk.dim(`Trace: ${options.traceId}`)); + } } else { this.logPlain(`Summary: ${formatFindingCountsPlain(counts)}`); if (totalFailedHunks > 0) { @@ -245,6 +248,9 @@ export class Reporter { this.logPlain(`Usage: ${formatUsagePlain(totalUsage)}`); } this.logPlain(`Total time: ${formatDuration(totalDuration)}`); + if (options?.traceId && this.verbosity >= Verbosity.Verbose) { + this.logPlain(`Trace: ${options.traceId}`); + } } } diff --git a/src/cli/output/tasks.ts b/src/cli/output/tasks.ts index a3258dc..5d7577c 100644 --- a/src/cli/output/tasks.ts +++ b/src/cli/output/tasks.ts @@ -432,9 +432,7 @@ export async function runSkillTask( } // Emit metrics and log completion - emitSkillMetrics(report, { - repository: context.repository.fullName, - }); + emitSkillMetrics(report); logger.info(logger.fmt`Skill execution complete: ${displayName}`, { 'finding.count': report.findings.length, 'duration_ms': report.durationMs, diff --git a/src/sentry.ts b/src/sentry.ts index d9766af..3bcaf51 100644 --- a/src/sentry.ts +++ b/src/sentry.ts @@ -6,7 +6,6 @@ import { getVersion } from './utils/index.js'; export type SentryContext = 'cli' | 'action'; let initialized = false; -let deploymentContext: SentryContext | undefined; export function initSentry(context: SentryContext): void { const dsn = process.env['WARDEN_SENTRY_DSN']; @@ -26,14 +25,41 @@ export function initSentry(context: SentryContext): void { ], }); - deploymentContext = context; - Sentry.setTag('deployment.context', context); Sentry.setTag('service.version', getVersion()); + Sentry.getGlobalScope().setAttributes({ + 'warden.source': context === 'action' ? 'github-action' : 'cli', + }); } export { Sentry }; export const { logger } = Sentry; +/** + * Set attributes on the global Sentry scope. + * These automatically apply to ALL metrics and spans. + */ +export function setGlobalAttributes(attrs: Record): void { + if (!initialized) return; + try { + Sentry.getGlobalScope().setAttributes(attrs); + } catch { + // Never break the workflow + } +} + +/** + * Get the trace ID from the active span, if available. + * Useful for correlating runs to Sentry traces in logs and output. + */ +export function getTraceId(): string | undefined { + if (!initialized) return undefined; + try { + return Sentry.getActiveSpan()?.spanContext().traceId; + } catch { + return undefined; + } +} + /** * Run a metrics callback only when Sentry is initialized. * Swallows errors so metrics never break the main workflow. @@ -47,20 +73,19 @@ function safeEmit(fn: () => void): void { } } -export interface SkillMetricsContext { - /** Full repository name (e.g. "owner/repo") */ - repository?: string; +/** + * Emit a single run count. Call once per analysis workflow execution. + * Inherits warden.source and warden.repository from global scope. + */ +export function emitRunMetric(): void { + safeEmit(() => { + Sentry.metrics.count('workflow.runs', 1); + }); } -export function emitSkillMetrics(report: SkillReport, context?: SkillMetricsContext): void { +export function emitSkillMetrics(report: SkillReport): void { safeEmit(() => { const attrs: Record = { skill: report.skill }; - if (context?.repository) { - attrs['repository'] = context.repository; - } - if (deploymentContext) { - attrs['source'] = deploymentContext; - } Sentry.metrics.distribution('skill.duration', report.durationMs ?? 0, { unit: 'millisecond',