Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 173 additions & 0 deletions pkg/llmproxy/benchmarks/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// Package benchmarks provides integration with tokenledger for dynamic benchmark data.
//
// This enables cliproxy++ to use real-time benchmark data from:
// - Artificial Analysis API (intelligence, speed, latency)
// - OpenRouter API (pricing, context)
// - CLIProxyAPI metrics (runtime performance)
package benchmarks

import (
"encoding/json"
"fmt"
"sync"
"time"
)

// BenchmarkData represents benchmark data for a model
type BenchmarkData struct {
ModelID string `json:"model_id"`
Provider string `json:"provider,omitempty"`
IntelligenceIndex *float64 `json:"intelligence_index,omitempty"`
CodingIndex *float64 `json:"coding_index,omitempty"`
SpeedTPS *float64 `json:"speed_tps,omitempty"`
LatencyTTFTMs *float64 `json:"latency_ttft_ms,omitempty"`
InputPricePer1M *float64 `json:"price_input_per_1m,omitempty"`
OutputPricePer1M *float64 `json:"price_output_per_1m,omitempty"`
ContextWindow *int64 `json:"context_window_tokens,omitempty"`
Confidence float64 `json:"confidence"`
Source string `json:"source"`
}

// Client for fetching benchmarks from tokenledger
type Client struct {
tokenledgerPath string
cache map[string]*cacheEntry
cacheMu sync.RWMutex
cacheTTL time.Duration
}

type cacheEntry struct {
data BenchmarkData
expires time.Time
}

// NewClient creates a new benchmark client
func NewClient(tokenledgerPath string) *Client {
return &Client{
tokenledgerPath: tokenledgerPath,
cache: make(map[string]*cacheEntry),
cacheTTL: time.Hour,
}
}

// GetBenchmark returns benchmark data for a model, with caching
func (c *Client) GetBenchmark(modelID string) (*BenchmarkData, error) {
// Check cache first
c.cacheMu.RLock()
if entry, ok := c.cache[modelID]; ok && time.Now().Before(entry.expires) {
c.cacheMu.RUnlock()
return &entry.data, nil
}
c.cacheMu.RUnlock()

// Fetch fresh data
data, err := c.fetchFromTokenledger(modelID)
if err != nil {
// Return cached expired data if fetch fails
c.cacheMu.RLock()
if entry, ok := c.cache[modelID]; ok {
c.cacheMu.RUnlock()
return &entry.data, nil
}
c.cacheMu.RUnlock()
return nil, err
}

// Update cache
c.cacheMu.Lock()
c.cache[modelID] = &cacheEntry{
data: *data,
expires: time.Now().Add(c.cacheTTL),
}
c.cacheMu.Unlock()

return data, nil
}

// fetchFromTokenledger calls the tokenledger CLI to get benchmark data
func (c *Client) fetchFromTokenledger(modelID string) (*BenchmarkData, error) {
// Call tokenledger CLI (would be implemented in Rust binary)
// For now, return nil to use fallback hardcoded values
return nil, fmt.Errorf("tokenledger not configured")
}

// GetAllBenchmarks returns all available benchmark data
func (c *Client) GetAllBenchmarks() ([]BenchmarkData, error) {
// This would call tokenledger to get all benchmarks
return nil, fmt.Errorf("tokenledger not configured")
}

// RefreshBenchmarks forces a refresh of benchmark data
func (c *Client) RefreshBenchmarks() error {
// Clear cache
c.cacheMu.Lock()
c.cache = make(map[string]*cacheEntry)
c.cacheMu.Unlock()

// Would trigger tokenledger to fetch fresh data
return nil
}

// GetQualityScore returns the quality score for a model
func (c *Client) GetQualityScore(modelID string) (float64, bool) {
data, err := c.GetBenchmark(modelID)
if err != nil || data == nil || data.IntelligenceIndex == nil {
return 0, false
}
return *data.IntelligenceIndex / 100.0, true // Normalize to 0-1
}

// GetCost returns the cost per 1K tokens for a model
func (c *Client) GetCost(modelID string) (float64, bool) {
data, err := c.GetBenchmark(modelID)
if err != nil || data == nil || data.InputPricePer1M == nil {
return 0, false
}
return *data.InputPricePer1M, true
}

// GetLatency returns the latency in ms for a model
func (c *Client) GetLatency(modelID string) (int, bool) {
data, err := c.GetBenchmark(modelID)
if err != nil || data == nil || data.LatencyTTFTMs == nil {
return 0, false
}
return int(*data.LatencyTTFTMs), true
}

// BenchmarkProvider defines interface for benchmark sources
type BenchmarkProvider interface {
GetBenchmark(modelID string) (*BenchmarkData, error)
GetAllBenchmarks() ([]BenchmarkData, error)
Refresh() error
}

// MockProvider provides hardcoded fallback data
type MockProvider struct{}

// NewMockProvider creates a provider with fallback data
func NewMockProvider() *MockProvider {
return &MockProvider{}
}

func (p *MockProvider) GetBenchmark(modelID string) (*BenchmarkData, error) {
return nil, fmt.Errorf("not implemented")
}

func (p *MockProvider) GetAllBenchmarks() ([]BenchmarkData, error) {
return nil, fmt.Errorf("not implemented")
}

func (p *MockProvider) Refresh() error {
return nil
}

// JSON marshaling support
func (b *BenchmarkData) MarshalJSON() ([]byte, error) {
type Alias BenchmarkData
return json.Marshal(&struct {
*Alias
}{
Alias: (*Alias)(b),
})
}
182 changes: 182 additions & 0 deletions pkg/llmproxy/benchmarks/unified.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
// Package benchmarks provides unified benchmark access with fallback to hardcoded values.
// This integrates with tokenledger for dynamic data while maintaining backward compatibility.
package benchmarks

import (
"fmt"
"sync"
)

// UnifiedBenchmarkStore combines dynamic tokenledger data with hardcoded fallbacks
type UnifiedBenchmarkStore struct {
primary BenchmarkProvider
fallbacks *FallbackProvider
mu sync.RWMutex
}

// FallbackProvider provides hardcoded benchmark values
type FallbackProvider struct {
// qualityProxy maps known model IDs to their quality scores in [0,1]
QualityProxy map[string]float64
// CostPer1kProxy maps model IDs to estimated cost per 1k tokens (USD)
CostPer1kProxy map[string]float64
// LatencyMsProxy maps model IDs to estimated p50 latency in milliseconds
LatencyMsProxy map[string]int
}

// DefaultFallbackProvider returns the hardcoded maps from pareto_router.go
func DefaultFallbackProvider() *FallbackProvider {
return &FallbackProvider{
QualityProxy: map[string]float64{
"claude-opus-4.6": 0.95,
"claude-opus-4.6-1m": 0.96,
"claude-sonnet-4.6": 0.88,
"claude-haiku-4.5": 0.75,
"gpt-5.3-codex-high": 0.92,
"gpt-5.3-codex": 0.82,
"claude-4.5-opus-high-thinking": 0.94,
"claude-4.5-opus-high": 0.92,
"claude-4.5-sonnet-thinking": 0.85,
"claude-4-sonnet": 0.80,
"gpt-4o": 0.85,
"gpt-5.1-codex": 0.80,
"gemini-3-flash": 0.78,
"gemini-3.1-pro": 0.90,
"gemini-2.5-flash": 0.76,
"gemini-2.0-flash": 0.72,
"glm-5": 0.78,
"minimax-m2.5": 0.75,
"deepseek-v3.2": 0.80,
"composer-1.5": 0.82,
"composer-1": 0.78,
"roo-default": 0.70,
"kilo-default": 0.70,
},
CostPer1kProxy: map[string]float64{
"claude-opus-4.6": 0.015,
"claude-opus-4.6-1m": 0.015,
"claude-sonnet-4.6": 0.003,
"claude-haiku-4.5": 0.00025,
"gpt-5.3-codex-high": 0.020,
"gpt-5.3-codex": 0.010,
"claude-4.5-opus-high-thinking": 0.025,
"claude-4.5-opus-high": 0.015,
"claude-4.5-sonnet-thinking": 0.005,
"claude-4-sonnet": 0.003,
"gpt-4o": 0.005,
"gpt-5.1-codex": 0.008,
"gemini-3-flash": 0.00015,
"gemini-3.1-pro": 0.007,
"gemini-2.5-flash": 0.0001,
"gemini-2.0-flash": 0.0001,
"glm-5": 0.001,
"minimax-m2.5": 0.001,
"deepseek-v3.2": 0.0005,
"composer-1.5": 0.002,
"composer-1": 0.001,
"roo-default": 0.0,
"kilo-default": 0.0,
},
LatencyMsProxy: map[string]int{
"claude-opus-4.6": 4000,
"claude-opus-4.6-1m": 5000,
"claude-sonnet-4.6": 2000,
"claude-haiku-4.5": 800,
"gpt-5.3-codex-high": 6000,
"gpt-5.3-codex": 3000,
"claude-4.5-opus-high-thinking": 8000,
"claude-4.5-opus-high": 5000,
"claude-4.5-sonnet-thinking": 4000,
"claude-4-sonnet": 2500,
"gpt-4o": 2000,
"gpt-5.1-codex": 3000,
"gemini-3-flash": 600,
"gemini-3.1-pro": 3000,
"gemini-2.5-flash": 500,
"gemini-2.0-flash": 400,
"glm-5": 1500,
"minimax-m2.5": 1200,
"deepseek-v3.2": 1000,
"composer-1.5": 2000,
"composer-1": 1500,
"roo-default": 1000,
"kilo-default": 1000,
},
}
}

// NewUnifiedStore creates a store with primary and fallback providers
func NewUnifiedStore(primary BenchmarkProvider) *UnifiedBenchmarkStore {
return &UnifiedBenchmarkStore{
primary: primary,
fallbacks: DefaultFallbackProvider(),
}
}

// NewFallbackOnlyStore creates a store with only hardcoded fallbacks
func NewFallbackOnlyStore() *UnifiedBenchmarkStore {
return &UnifiedBenchmarkStore{
primary: nil,
fallbacks: DefaultFallbackProvider(),
}
}

// GetQuality returns quality score, trying primary first then fallback
func (s *UnifiedBenchmarkStore) GetQuality(modelID string) (float64, bool) {
// Try primary (tokenledger) first
if s.primary != nil {
if data, err := s.primary.GetBenchmark(modelID); err == nil && data != nil && data.IntelligenceIndex != nil {
return *data.IntelligenceIndex / 100.0, true
}
}

// Fallback to hardcoded
if q, ok := s.fallbacks.QualityProxy[modelID]; ok {
return q, true
}
return 0, false
}

// GetCost returns cost per 1K tokens, trying primary then fallback
func (s *UnifiedBenchmarkStore) GetCost(modelID string) (float64, bool) {
if s.primary != nil {
if data, err := s.primary.GetBenchmark(modelID); err == nil && data != nil && data.InputPricePer1M != nil {
return *data.InputPricePer1M, true
}
}

if c, ok := s.fallbacks.CostPer1kProxy[modelID]; ok {
return c, true
}
return 0, false
}

// GetLatency returns latency in ms, trying primary then fallback
func (s *UnifiedBenchmarkStore) GetLatency(modelID string) (int, bool) {
if s.primary != nil {
if data, err := s.primary.GetBenchmark(modelID); err == nil && data != nil && data.LatencyTTFTMs != nil {
return int(*data.LatencyTTFTMs), true
}
}

if l, ok := s.fallbacks.LatencyMsProxy[modelID]; ok {
return l, true
}
return 0, false
}

// GetAll returns all benchmark data from primary
func (s *UnifiedBenchmarkStore) GetAll() ([]BenchmarkData, error) {
if s.primary == nil {
return nil, fmt.Errorf("no primary provider configured")
}
return s.primary.GetAllBenchmarks()
}

// Refresh triggers a refresh of benchmark data
func (s *UnifiedBenchmarkStore) Refresh() error {
if s.primary != nil {
return s.primary.Refresh()
}
return nil
}
Loading
Loading