Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions pkg/cli/audit_report.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,13 @@ type OverviewData struct {

// MetricsData contains execution metrics
type MetricsData struct {
TokenUsage int `json:"token_usage,omitempty" console:"header:Token Usage,format:number,omitempty"`
EstimatedCost float64 `json:"estimated_cost,omitempty" console:"header:Estimated Cost,format:cost,omitempty"`
ActionMinutes float64 `json:"action_minutes,omitempty" console:"header:Action Minutes,omitempty"`
Turns int `json:"turns,omitempty" console:"header:Turns,omitempty"`
ErrorCount int `json:"error_count" console:"header:Errors"`
WarningCount int `json:"warning_count" console:"header:Warnings"`
TokenUsage int `json:"token_usage,omitempty" console:"header:Token Usage,format:number,omitempty"`
EffectiveTokens int `json:"effective_tokens,omitempty" console:"header:Effective Tokens,format:number,omitempty"`
EstimatedCost float64 `json:"estimated_cost,omitempty" console:"header:Estimated Cost,format:cost,omitempty"`
ActionMinutes float64 `json:"action_minutes,omitempty" console:"header:Action Minutes,omitempty"`
Turns int `json:"turns,omitempty" console:"header:Turns,omitempty"`
ErrorCount int `json:"error_count" console:"header:Errors"`
WarningCount int `json:"warning_count" console:"header:Warnings"`
}

// JobData contains information about individual jobs
Expand Down Expand Up @@ -276,6 +277,14 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage
WarningCount: run.WarningCount,
}

// Populate effective tokens from the firewall proxy summary when available,
// otherwise fall back to the effective tokens stored on the run itself.
if processedRun.TokenUsage != nil && processedRun.TokenUsage.TotalEffectiveTokens > 0 {
metricsData.EffectiveTokens = processedRun.TokenUsage.TotalEffectiveTokens
} else if run.EffectiveTokens > 0 {
metricsData.EffectiveTokens = run.EffectiveTokens
}

// Populate ActionMinutes from run duration so it is always visible even
// when token/turn metrics are zero (e.g. Codex runs that exit early).
// Use math.Ceil to match the billable-minute rounding used elsewhere.
Expand Down
44 changes: 44 additions & 0 deletions pkg/cli/data/model_multipliers.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"version": "1",
"description": "Effective Tokens (ET) computation data per the gh-aw Effective Tokens Specification v0.2.0. Token class weights are applied first to normalize across token classes, then the per-model multiplier scales the result relative to the reference model.",
"reference_model": "claude-sonnet-4.5",
"token_class_weights": {
"input": 1.0,
"cached_input": 0.1,
"output": 4.0,
"reasoning": 4.0,
"cache_write": 1.0
},
"multipliers": {
"claude-haiku-4.5": 0.1,
"claude-3-5-haiku": 0.1,
"claude-3-haiku": 0.1,
"claude-sonnet-4.5": 1.0,
"claude-sonnet-4.6": 1.0,
"claude-3-5-sonnet": 1.0,
"claude-3-7-sonnet": 1.0,
"claude-3-sonnet": 1.0,
"claude-opus-4.5": 5.0,
"claude-opus-4.6": 5.0,
"claude-3-5-opus": 5.0,
"claude-3-opus": 5.0,
"gpt-4o": 1.0,
"gpt-4o-mini": 0.1,
"gpt-4.1": 1.0,
"gpt-4.1-mini": 0.1,
"gpt-4.1-nano": 0.05,
"gpt-4-turbo": 1.0,
"gpt-4": 1.0,
"o1": 3.0,
"o1-mini": 0.5,
"o1-pro": 10.0,
"o3": 3.0,
"o3-mini": 0.5,
"o4-mini": 0.5,
"gemini-2.5-pro": 1.0,
"gemini-2.5-flash": 0.2,
"gemini-2.0-flash": 0.1,
"gemini-1.5-pro": 1.0,
"gemini-1.5-flash": 0.1
}
}
223 changes: 223 additions & 0 deletions pkg/cli/effective_tokens.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
package cli

// This file provides command-line interface functionality for gh-aw.
// This file (effective_tokens.go) implements the Effective Tokens (ET) specification
// defined in docs/src/content/docs/reference/effective-tokens-specification.md.
//
// Effective Tokens normalize raw token counts across token classes and model pricing
// using the formula:
//
// base_weighted_tokens = (w_in × I) + (w_cache × C) + (w_out × O) + (w_reason × R)
// effective_tokens = m × base_weighted_tokens
//
// where:
// - I = input tokens (w_in = 1.0 default)
// - C = cached input tokens (w_cache = 0.1 default)
// - O = output tokens (w_out = 4.0 default)
// - R = reasoning tokens (w_reason = 4.0 default)
// - m = per-model multiplier relative to the reference model
//
// Token class weights and model multipliers are loaded from the embedded
// data/model_multipliers.json file and can be updated without recompilation.
//
Comment on lines +21 to +22
Copy link

Copilot AI Apr 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment says the embedded data/model_multipliers.json "can be updated without recompilation", but //go:embed bakes the file into the binary, so updates require rebuilding. Either adjust the comment to reflect that, or implement an override mechanism (e.g., optional external file/env var) if runtime updates are intended.

Suggested change
// data/model_multipliers.json file and can be updated without recompilation.
//
// data/model_multipliers.json file, which is compiled into the binary. Updates
// to this file require recompilation.

Copilot uses AI. Check for mistakes.
// Key responsibilities:
// - Embedding model_multipliers.json at compile time
// - Applying token class weights before the model multiplier
// - Providing model multiplier lookup with prefix matching for model variants
// - Computing effective tokens from raw per-model token usage data
// - Populating effective token counts on TokenUsageSummary after parsing

import (
_ "embed"
"encoding/json"
"math"
"strings"

"github.com/github/gh-aw/pkg/logger"
)

var effectiveTokensLog = logger.New("cli:effective_tokens")

//go:embed data/model_multipliers.json
var modelMultipliersJSON []byte

// tokenClassWeights holds the per-token-class weight values from the specification.
type tokenClassWeights struct {
Input float64 `json:"input"`
CachedInput float64 `json:"cached_input"`
Output float64 `json:"output"`
Reasoning float64 `json:"reasoning"`
CacheWrite float64 `json:"cache_write"`
}

// modelMultipliersData is the top-level structure of model_multipliers.json.
type modelMultipliersData struct {
Version string `json:"version"`
Description string `json:"description"`
ReferenceModel string `json:"reference_model"`
TokenClassWeights tokenClassWeights `json:"token_class_weights"`
Multipliers map[string]float64 `json:"multipliers"`
}

// loadedMultipliers is the parsed multiplier table, keyed by lowercase model name.
// Initialized once on first call to effectiveTokenMultiplier.
var loadedMultipliers map[string]float64

// loadedTokenWeights holds the token class weights from the JSON file.
// Initialized once on first call to initMultipliers.
var loadedTokenWeights tokenClassWeights

// initMultipliers parses the embedded JSON and populates loadedMultipliers and
// loadedTokenWeights. Safe to call multiple times; only initializes once.
func initMultipliers() {
if loadedMultipliers != nil {
return
}

Comment on lines +62 to +76
Copy link

Copilot AI Apr 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

loadedMultipliers / loadedTokenWeights are lazily initialized via a nil check without synchronization. Since token usage parsing runs concurrently (e.g., in downloadRunArtifactsConcurrent), this can cause data races and undefined behavior under -race (concurrent reads/writes to the globals and the map). Use sync.Once (or a mutex) to guard initialization and make reads safe, and provide a test-only reset hook if tests need to force re-init.

Copilot uses AI. Check for mistakes.
var data modelMultipliersData
if err := json.Unmarshal(modelMultipliersJSON, &data); err != nil {
effectiveTokensLog.Printf("Failed to parse model_multipliers.json: %v", err)
loadedMultipliers = make(map[string]float64)
loadedTokenWeights = defaultTokenClassWeights()
return
}

loadedMultipliers = make(map[string]float64, len(data.Multipliers))
for model, mult := range data.Multipliers {
loadedMultipliers[strings.ToLower(model)] = mult
}

// Fall back to default weights for any zero-valued field (zero means not set)
defaults := defaultTokenClassWeights()
loadedTokenWeights = data.TokenClassWeights
if loadedTokenWeights.Input == 0 {
loadedTokenWeights.Input = defaults.Input
}
if loadedTokenWeights.CachedInput == 0 {
loadedTokenWeights.CachedInput = defaults.CachedInput
}
if loadedTokenWeights.Output == 0 {
loadedTokenWeights.Output = defaults.Output
}
if loadedTokenWeights.Reasoning == 0 {
loadedTokenWeights.Reasoning = defaults.Reasoning
}
if loadedTokenWeights.CacheWrite == 0 {
loadedTokenWeights.CacheWrite = defaults.CacheWrite
}

effectiveTokensLog.Printf("Loaded %d model multipliers (reference: %s, w_in=%.1f w_cache=%.1f w_out=%.1f)",
len(loadedMultipliers), data.ReferenceModel,
loadedTokenWeights.Input, loadedTokenWeights.CachedInput, loadedTokenWeights.Output)
}

// defaultTokenClassWeights returns the specification-mandated default weights.
func defaultTokenClassWeights() tokenClassWeights {
return tokenClassWeights{
Input: 1.0,
CachedInput: 0.1,
Output: 4.0,
Reasoning: 4.0,
CacheWrite: 1.0,
}
}

// effectiveTokenMultiplier returns the per-model cost multiplier for the given model name.
// Lookup order:
// 1. Exact case-insensitive match
// 2. Longest prefix match (e.g. "claude-sonnet-4.6-preview" → "claude-sonnet-4.6")
// 3. Default: 1.0 (unknown model treated as reference baseline)
func effectiveTokenMultiplier(model string) float64 {
initMultipliers()

key := strings.ToLower(strings.TrimSpace(model))
if key == "" {
return 1.0
}

// Exact match
if mult, ok := loadedMultipliers[key]; ok {
return mult
}

// Longest prefix match
best := ""
bestMult := 1.0
for name, mult := range loadedMultipliers {
if strings.HasPrefix(key, name) && len(name) > len(best) {
best = name
bestMult = mult
}
}

if best != "" {
effectiveTokensLog.Printf("Model %q matched via prefix %q (multiplier=%.2f)", model, best, bestMult)
return bestMult
}

effectiveTokensLog.Printf("Unknown model %q, using default multiplier 1.0", model)
return 1.0
}

// computeBaseWeightedTokens computes the base weighted token count for a single invocation
// by applying per-token-class weights to the raw token counts.
//
// Formula (from the ET specification):
//
// base = (w_in × I) + (w_cache × C) + (w_out × O) + (w_reason × R) + (w_cache_write × W)
//
// where R (reasoning tokens) is currently not tracked separately and defaults to 0.
func computeBaseWeightedTokens(inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens int) float64 {
initMultipliers()
w := loadedTokenWeights
return w.Input*float64(inputTokens) +
w.CachedInput*float64(cacheReadTokens) +
w.Output*float64(outputTokens) +
w.CacheWrite*float64(cacheWriteTokens)
}

// computeModelEffectiveTokens returns the effective token count for a single model invocation.
//
// Formula (from the ET specification):
//
// effective_tokens = m × base_weighted_tokens
//
// The result is rounded to the nearest integer.
func computeModelEffectiveTokens(model string, inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens int) int {
base := computeBaseWeightedTokens(inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens)
if base == 0 {
return 0
}
mult := effectiveTokenMultiplier(model)
return int(math.Round(base * mult))
}

// populateEffectiveTokens fills in the EffectiveTokens field on each ModelTokenUsage
// entry and computes the TotalEffectiveTokens aggregate on the summary.
// It is a no-op when summary is nil.
func populateEffectiveTokens(summary *TokenUsageSummary) {
if summary == nil {
return
}

total := 0
for model, usage := range summary.ByModel {
if usage == nil {
continue
}
eff := computeModelEffectiveTokens(
model,
usage.InputTokens,
usage.OutputTokens,
usage.CacheReadTokens,
usage.CacheWriteTokens,
)
usage.EffectiveTokens = eff
total += eff
}
summary.TotalEffectiveTokens = total

if effectiveTokensLog.Enabled() {
effectiveTokensLog.Printf("Effective tokens: total=%d models=%d", total, len(summary.ByModel))
}
}
Loading
Loading