KooshaPari · KooshaPari · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/pkg/llmproxy/benchmarks/client.go b/pkg/llmproxy/benchmarks/client.go
@@ -0,0 +1,173 @@
+// Package benchmarks provides integration with tokenledger for dynamic benchmark data.
+// 
+// This enables cliproxy++ to use real-time benchmark data from:
+// - Artificial Analysis API (intelligence, speed, latency)
+// - OpenRouter API (pricing, context)
+// - CLIProxyAPI metrics (runtime performance)
+package benchmarks
+
+import (
+	"encoding/json"
+	"fmt"
+	"sync"
+	"time"
+)
+
+// BenchmarkData represents benchmark data for a model
+type BenchmarkData struct {
+	ModelID              string   `json:"model_id"`
+	Provider             string   `json:"provider,omitempty"`
+	IntelligenceIndex    *float64 `json:"intelligence_index,omitempty"`
+	CodingIndex         *float64 `json:"coding_index,omitempty"`
+	SpeedTPS            *float64 `json:"speed_tps,omitempty"`
+	LatencyTTFTMs       *float64 `json:"latency_ttft_ms,omitempty"`
+	InputPricePer1M      *float64 `json:"price_input_per_1m,omitempty"`
+	OutputPricePer1M     *float64 `json:"price_output_per_1m,omitempty"`
+	ContextWindow        *int64   `json:"context_window_tokens,omitempty"`
+	Confidence          float64  `json:"confidence"`
+	Source             string   `json:"source"`
+}
+
+// Client for fetching benchmarks from tokenledger
+type Client struct {
+	tokenledgerPath string
+	cache          map[string]*cacheEntry
+	cacheMu        sync.RWMutex
+	cacheTTL       time.Duration
+}
+
+type cacheEntry struct {
+	data      BenchmarkData
+	expires  time.Time
+}
+
+// NewClient creates a new benchmark client
+func NewClient(tokenledgerPath string) *Client {
+	return &Client{
+		tokenledgerPath: tokenledgerPath,
+		cache:         make(map[string]*cacheEntry),
+		cacheTTL:       time.Hour,
+	}
+}
+
+// GetBenchmark returns benchmark data for a model, with caching
+func (c *Client) GetBenchmark(modelID string) (*BenchmarkData, error) {
+	// Check cache first
+	c.cacheMu.RLock()
+	if entry, ok := c.cache[modelID]; ok && time.Now().Before(entry.expires) {
+		c.cacheMu.RUnlock()
+		return &entry.data, nil
+	}
+	c.cacheMu.RUnlock()
+
+	// Fetch fresh data
+	data, err := c.fetchFromTokenledger(modelID)
+	if err != nil {
+		// Return cached expired data if fetch fails
+		c.cacheMu.RLock()
+		if entry, ok := c.cache[modelID]; ok {
+			c.cacheMu.RUnlock()
+			return &entry.data, nil
+		}
+		c.cacheMu.RUnlock()
+		return nil, err
+	}
+
+	// Update cache
+	c.cacheMu.Lock()
+	c.cache[modelID] = &cacheEntry{
+		data:   *data,
+		expires: time.Now().Add(c.cacheTTL),
+	}
+	c.cacheMu.Unlock()
+
+	return data, nil
+}
+
+// fetchFromTokenledger calls the tokenledger CLI to get benchmark data
+func (c *Client) fetchFromTokenledger(modelID string) (*BenchmarkData, error) {
+	// Call tokenledger CLI (would be implemented in Rust binary)
+	// For now, return nil to use fallback hardcoded values
+	return nil, fmt.Errorf("tokenledger not configured")
+}
+
+// GetAllBenchmarks returns all available benchmark data
+func (c *Client) GetAllBenchmarks() ([]BenchmarkData, error) {
+	// This would call tokenledger to get all benchmarks
+	return nil, fmt.Errorf("tokenledger not configured")
+}
+
+// RefreshBenchmarks forces a refresh of benchmark data
+func (c *Client) RefreshBenchmarks() error {
+	// Clear cache
+	c.cacheMu.Lock()
+	c.cache = make(map[string]*cacheEntry)
+	c.cacheMu.Unlock()
+
+	// Would trigger tokenledger to fetch fresh data
+	return nil
+}
+
+// GetQualityScore returns the quality score for a model
+func (c *Client) GetQualityScore(modelID string) (float64, bool) {
+	data, err := c.GetBenchmark(modelID)
+	if err != nil || data == nil || data.IntelligenceIndex == nil {
+		return 0, false
+	}
+	return *data.IntelligenceIndex / 100.0, true // Normalize to 0-1
+}
+
+// GetCost returns the cost per 1K tokens for a model
+func (c *Client) GetCost(modelID string) (float64, bool) {
+	data, err := c.GetBenchmark(modelID)
+	if err != nil || data == nil || data.InputPricePer1M == nil {
+		return 0, false
+	}
+	return *data.InputPricePer1M, true
+}
+
+// GetLatency returns the latency in ms for a model
+func (c *Client) GetLatency(modelID string) (int, bool) {
+	data, err := c.GetBenchmark(modelID)
+	if err != nil || data == nil || data.LatencyTTFTMs == nil {
+		return 0, false
+	}
+	return int(*data.LatencyTTFTMs), true
+}
+
+// BenchmarkProvider defines interface for benchmark sources
+type BenchmarkProvider interface {
+	GetBenchmark(modelID string) (*BenchmarkData, error)
+	GetAllBenchmarks() ([]BenchmarkData, error)
+	Refresh() error
+}
+
+// MockProvider provides hardcoded fallback data
+type MockProvider struct{}
+
+// NewMockProvider creates a provider with fallback data
+func NewMockProvider() *MockProvider {
+	return &MockProvider{}
+}
+
+func (p *MockProvider) GetBenchmark(modelID string) (*BenchmarkData, error) {
+	return nil, fmt.Errorf("not implemented")
+}
+
+func (p *MockProvider) GetAllBenchmarks() ([]BenchmarkData, error) {
+	return nil, fmt.Errorf("not implemented")
+}
+
+func (p *MockProvider) Refresh() error {
+	return nil
+}
+
+// JSON marshaling support
+func (b *BenchmarkData) MarshalJSON() ([]byte, error) {
+	type Alias BenchmarkData
+	return json.Marshal(&struct {
+		*Alias
+	}{
+		Alias: (*Alias)(b),
+	})
+}
diff --git a/pkg/llmproxy/benchmarks/unified.go b/pkg/llmproxy/benchmarks/unified.go
@@ -0,0 +1,182 @@
+// Package benchmarks provides unified benchmark access with fallback to hardcoded values.
+// This integrates with tokenledger for dynamic data while maintaining backward compatibility.
+package benchmarks
+
+import (
+	"fmt"
+	"sync"
+)
+
+// UnifiedBenchmarkStore combines dynamic tokenledger data with hardcoded fallbacks
+type UnifiedBenchmarkStore struct {
+	primary   BenchmarkProvider
+	fallbacks *FallbackProvider
+	mu        sync.RWMutex
+}
+
+// FallbackProvider provides hardcoded benchmark values
+type FallbackProvider struct {
+	// qualityProxy maps known model IDs to their quality scores in [0,1]
+	QualityProxy map[string]float64
+	// CostPer1kProxy maps model IDs to estimated cost per 1k tokens (USD)
+	CostPer1kProxy map[string]float64
+	// LatencyMsProxy maps model IDs to estimated p50 latency in milliseconds
+	LatencyMsProxy map[string]int
+}
+
+// DefaultFallbackProvider returns the hardcoded maps from pareto_router.go
+func DefaultFallbackProvider() *FallbackProvider {
+	return &FallbackProvider{
+		QualityProxy: map[string]float64{
+			"claude-opus-4.6":               0.95,
+			"claude-opus-4.6-1m":            0.96,
+			"claude-sonnet-4.6":             0.88,
+			"claude-haiku-4.5":              0.75,
+			"gpt-5.3-codex-high":            0.92,
+			"gpt-5.3-codex":                 0.82,
+			"claude-4.5-opus-high-thinking": 0.94,
+			"claude-4.5-opus-high":          0.92,
+			"claude-4.5-sonnet-thinking":    0.85,
+			"claude-4-sonnet":               0.80,
+			"gpt-4o":                        0.85,
+			"gpt-5.1-codex":                 0.80,
+			"gemini-3-flash":                0.78,
+			"gemini-3.1-pro":                0.90,
+			"gemini-2.5-flash":              0.76,
+			"gemini-2.0-flash":              0.72,
+			"glm-5":                         0.78,
+			"minimax-m2.5":                  0.75,
+			"deepseek-v3.2":                 0.80,
+			"composer-1.5":                  0.82,
+			"composer-1":                    0.78,
+			"roo-default":                   0.70,
+			"kilo-default":                  0.70,
+		},
+		CostPer1kProxy: map[string]float64{
+			"claude-opus-4.6":               0.015,
+			"claude-opus-4.6-1m":            0.015,
+			"claude-sonnet-4.6":             0.003,
+			"claude-haiku-4.5":              0.00025,
+			"gpt-5.3-codex-high":            0.020,
+			"gpt-5.3-codex":                 0.010,
+			"claude-4.5-opus-high-thinking": 0.025,
+			"claude-4.5-opus-high":          0.015,
+			"claude-4.5-sonnet-thinking":    0.005,
+			"claude-4-sonnet":               0.003,
+			"gpt-4o":                        0.005,
+			"gpt-5.1-codex":                 0.008,
+			"gemini-3-flash":                0.00015,
+			"gemini-3.1-pro":                0.007,
+			"gemini-2.5-flash":              0.0001,
+			"gemini-2.0-flash":              0.0001,
+			"glm-5":                         0.001,
+			"minimax-m2.5":                  0.001,
+			"deepseek-v3.2":                 0.0005,
+			"composer-1.5":                  0.002,
+			"composer-1":                    0.001,
+			"roo-default":                   0.0,
+			"kilo-default":                  0.0,
+		},
+		LatencyMsProxy: map[string]int{
+			"claude-opus-4.6":               4000,
+			"claude-opus-4.6-1m":            5000,
+			"claude-sonnet-4.6":             2000,
+			"claude-haiku-4.5":              800,
+			"gpt-5.3-codex-high":            6000,
+			"gpt-5.3-codex":                 3000,
+			"claude-4.5-opus-high-thinking": 8000,
+			"claude-4.5-opus-high":          5000,
+			"claude-4.5-sonnet-thinking":    4000,
+			"claude-4-sonnet":               2500,
+			"gpt-4o":                        2000,
+			"gpt-5.1-codex":                 3000,
+			"gemini-3-flash":                600,
+			"gemini-3.1-pro":                3000,
+			"gemini-2.5-flash":              500,
+			"gemini-2.0-flash":              400,
+			"glm-5":                         1500,
+			"minimax-m2.5":                  1200,
+			"deepseek-v3.2":                 1000,
+			"composer-1.5":                  2000,
+			"composer-1":                    1500,
+			"roo-default":                   1000,
+			"kilo-default":                  1000,
+		},
+	}
+}
+
+// NewUnifiedStore creates a store with primary and fallback providers
+func NewUnifiedStore(primary BenchmarkProvider) *UnifiedBenchmarkStore {
+	return &UnifiedBenchmarkStore{
+		primary:   primary,
+		fallbacks: DefaultFallbackProvider(),
+	}
+}
+
+// NewFallbackOnlyStore creates a store with only hardcoded fallbacks
+func NewFallbackOnlyStore() *UnifiedBenchmarkStore {
+	return &UnifiedBenchmarkStore{
+		primary:   nil,
+		fallbacks: DefaultFallbackProvider(),
+	}
+}
+
+// GetQuality returns quality score, trying primary first then fallback
+func (s *UnifiedBenchmarkStore) GetQuality(modelID string) (float64, bool) {
+	// Try primary (tokenledger) first
+	if s.primary != nil {
+		if data, err := s.primary.GetBenchmark(modelID); err == nil && data != nil && data.IntelligenceIndex != nil {
+			return *data.IntelligenceIndex / 100.0, true
+		}
+	}
+
+	// Fallback to hardcoded
+	if q, ok := s.fallbacks.QualityProxy[modelID]; ok {
+		return q, true
+	}
+	return 0, false
+}
+
+// GetCost returns cost per 1K tokens, trying primary then fallback
+func (s *UnifiedBenchmarkStore) GetCost(modelID string) (float64, bool) {
+	if s.primary != nil {
+		if data, err := s.primary.GetBenchmark(modelID); err == nil && data != nil && data.InputPricePer1M != nil {
+			return *data.InputPricePer1M, true
+		}
+	}
+
+	if c, ok := s.fallbacks.CostPer1kProxy[modelID]; ok {
+		return c, true
+	}
+	return 0, false
+}
+
+// GetLatency returns latency in ms, trying primary then fallback
+func (s *UnifiedBenchmarkStore) GetLatency(modelID string) (int, bool) {
+	if s.primary != nil {
+		if data, err := s.primary.GetBenchmark(modelID); err == nil && data != nil && data.LatencyTTFTMs != nil {
+			return int(*data.LatencyTTFTMs), true
+		}
+	}
+
+	if l, ok := s.fallbacks.LatencyMsProxy[modelID]; ok {
+		return l, true
+	}
+	return 0, false
+}
+
+// GetAll returns all benchmark data from primary
+func (s *UnifiedBenchmarkStore) GetAll() ([]BenchmarkData, error) {
+	if s.primary == nil {
+		return nil, fmt.Errorf("no primary provider configured")
+	}
+	return s.primary.GetAllBenchmarks()
+}
+
+// Refresh triggers a refresh of benchmark data
+func (s *UnifiedBenchmarkStore) Refresh() error {
+	if s.primary != nil {
+		return s.primary.Refresh()
+	}
+	return nil
+}