Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions src/analyzers/claude_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@ pub enum ContentBlock {
},
ToolResult {
tool_use_id: String, // e.g. "toolu_01K7hbuwktKtti8mQb1wH2q8"
content: Content, // e.g. "Found 4 files\nC:\\..."
#[serde(default)]
content: Option<Content>, // e.g. "Found 4 files\nC:\\..." — absent for empty results
},
Text {
text: serde_bytes::ByteBuf,
Expand All @@ -309,6 +310,9 @@ pub enum ContentBlock {
Image {
source: ImageSource,
},
// Catch-all for unknown/new content block types (e.g. tool_reference, redacted_thinking)
#[serde(other)]
Other,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
Expand All @@ -324,15 +328,25 @@ pub enum ImageSource {
Base64 { media_type: String, data: String },
}

/// Deserializes a JSON value as u64, treating null as 0.
/// Needed because some providers (e.g. OpenRouter) send `null` for token counts
/// instead of omitting the field, and `#[serde(default)]` only handles missing fields.
fn deserialize_u64_or_null<'de, D>(deserializer: D) -> Result<u64, D::Error>
where
D: serde::Deserializer<'de>,
{
Ok(Option::<u64>::deserialize(deserializer)?.unwrap_or(0))
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Usage {
#[serde(default)]
#[serde(default, deserialize_with = "deserialize_u64_or_null")]
pub input_tokens: u64,
#[serde(default)]
#[serde(default, deserialize_with = "deserialize_u64_or_null")]
pub output_tokens: u64,
#[serde(default)]
#[serde(default, deserialize_with = "deserialize_u64_or_null")]
pub cache_creation_input_tokens: u64,
#[serde(default)]
#[serde(default, deserialize_with = "deserialize_u64_or_null")]
pub cache_read_input_tokens: u64,
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

Expand Down Expand Up @@ -411,6 +425,9 @@ enum ClaudeCodeEntry {
QueueOperation(ClaudeCodeQueueOperationEntry),
#[serde(rename = "progress")]
Progress(ClaudeCodeProgressEntry),
// Catch-all for unknown/new entry types (e.g. last-prompt)
#[serde(other)]
Other,
}

pub mod tool_schema {
Expand Down
70 changes: 35 additions & 35 deletions src/analyzers/codex_cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -392,45 +392,45 @@ pub(crate) fn parse_codex_cli_jsonl_file(
session_name: effective_name,
});
}
"assistant" => {
"assistant"
// Token usage is now emitted immediately when processing token_count
// events. We still track assistant messages without additional stats
// to avoid double-counting when Codex emits separate reasoning/tool
// outputs.
if !saw_token_usage {
let model_state = session_model.clone().unwrap_or_else(|| {
let fallback = SessionModel::inferred(
DEFAULT_FALLBACK_MODEL.to_string(),
);
warn_once(format!(
"WARNING: session {file_path_str} missing model metadata; using fallback model {} for cost estimation.",
fallback.name
));
session_model = Some(fallback.clone());
fallback
});

entries.push(ConversationMessage {
application: Application::CodexCli,
model: Some(model_state.name.clone()),
global_hash: hash_text(&format!(
"{}_{}_assistant_{}",
file_path_str,
wrapper.timestamp.to_rfc3339(),
entries.len()
)),
local_hash: None,
conversation_hash: hash_text(&file_path_str),
date: wrapper.timestamp,
project_hash: "".to_string(),
stats: Stats::default(),
role: MessageRole::Assistant,
uuid: None,
session_name: session_name
.clone()
.or_else(|| fallback_session_name.clone()),
});
}
if !saw_token_usage =>
{
let model_state = session_model.clone().unwrap_or_else(|| {
let fallback = SessionModel::inferred(
DEFAULT_FALLBACK_MODEL.to_string(),
);
warn_once(format!(
"WARNING: session {file_path_str} missing model metadata; using fallback model {} for cost estimation.",
fallback.name
));
session_model = Some(fallback.clone());
fallback
});

entries.push(ConversationMessage {
application: Application::CodexCli,
model: Some(model_state.name.clone()),
global_hash: hash_text(&format!(
"{}_{}_assistant_{}",
file_path_str,
wrapper.timestamp.to_rfc3339(),
entries.len()
)),
local_hash: None,
conversation_hash: hash_text(&file_path_str),
date: wrapper.timestamp,
project_hash: "".to_string(),
stats: Stats::default(),
role: MessageRole::Assistant,
uuid: None,
session_name: session_name
.clone()
.or_else(|| fallback_session_name.clone()),
});
}
_ => {}
}
Expand Down
11 changes: 5 additions & 6 deletions src/analyzers/copilot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,15 +149,14 @@ fn count_tokens(text: &str) -> u64 {
// Recursively extract all text content from a nested JSON structure
fn extract_text_from_value(value: &simd_json::OwnedValue, accumulated_text: &mut String) {
match value {
simd_json::OwnedValue::String(s) => {
simd_json::OwnedValue::String(s)
// Only accumulate if it's a "text" field value, not metadata like URIs
if !s.starts_with("vscode-")
&& !s.starts_with("file://")
&& !s.starts_with("ssh-remote")
{
accumulated_text.push_str(s);
accumulated_text.push(' ');
}
&& !s.starts_with("ssh-remote") =>
{
accumulated_text.push_str(s);
accumulated_text.push(' ');
}
simd_json::OwnedValue::Object(obj) => {
// Look for "text" fields specifically
Expand Down
184 changes: 177 additions & 7 deletions src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,35 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! {
caching: CachingSupport::None,
is_estimated: false,
},
"gpt-5.4" => ModelInfo {
pricing: PricingStructure::Tiered {
tiers: &[
PricingTier {
max_tokens: Some(272_000),
input_per_1m: 2.50,
output_per_1m: 15.0,
},
PricingTier {
max_tokens: None,
input_per_1m: 5.0,
output_per_1m: 22.5,
},
],
},
caching: CachingSupport::Google {
tiers: &[
CachingTier {
max_tokens: Some(272_000),
cached_input_per_1m: 0.25,
},
CachingTier {
max_tokens: None,
cached_input_per_1m: 0.50,
},
],
},
is_estimated: false,
},
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// Anthropic Models
"claude-opus-4-6" => ModelInfo {
Expand Down Expand Up @@ -780,6 +809,78 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! {
caching: CachingSupport::None,
is_estimated: false,
},

// Z.AI (Zhipu AI) - Additional Models
"glm-5" => ModelInfo {
pricing: PricingStructure::Flat {
input_per_1m: 1.0,
output_per_1m: 3.2,
},
caching: CachingSupport::OpenAI {
cached_input_per_1m: 0.2,
},
is_estimated: false,
},
"glm-5-code" => ModelInfo {
pricing: PricingStructure::Flat {
input_per_1m: 1.2,
output_per_1m: 5.0,
},
caching: CachingSupport::OpenAI {
cached_input_per_1m: 0.3,
},
is_estimated: false,
},
"glm-4.5-air" => ModelInfo {
pricing: PricingStructure::Flat {
input_per_1m: 0.2,
output_per_1m: 1.1,
},
caching: CachingSupport::OpenAI {
cached_input_per_1m: 0.03,
},
is_estimated: false,
},

// MiniMax Models
"minimax-m2.5" => ModelInfo {
pricing: PricingStructure::Flat {
input_per_1m: 0.30,
output_per_1m: 1.10,
},
caching: CachingSupport::None,
is_estimated: false,
},

// StepFun Models
"step-3.5-flash" => ModelInfo {
pricing: PricingStructure::Flat {
input_per_1m: 0.10,
output_per_1m: 0.30,
},
caching: CachingSupport::None,
is_estimated: false,
},

// Upstage Models
"solar-pro-3" => ModelInfo {
pricing: PricingStructure::Flat {
input_per_1m: 0.15,
output_per_1m: 0.60,
},
caching: CachingSupport::None,
is_estimated: false,
},

// OpenRouter Models
"aurora-alpha" => ModelInfo {
pricing: PricingStructure::Flat {
input_per_1m: 0.0,
output_per_1m: 0.0,
},
caching: CachingSupport::None,
is_estimated: false,
},
};

static MODEL_ALIASES: phf::Map<&'static str, &'static str> = phf_map! {
Expand Down Expand Up @@ -912,19 +1013,88 @@ static MODEL_ALIASES: phf::Map<&'static str, &'static str> = phf_map! {

// Zhipu AI aliases
"zai-glm-4.6" => "glm-4.6",
"glm-5-20260211" => "glm-5",
"glm-5-code" => "glm-5-code",
"glm-5-code-20260211" => "glm-5-code",
"glm-4.5-air-20260211" => "glm-4.5-air",

// OpenAI aliases (continued)
"gpt-5.4" => "gpt-5.4",
"gpt-5.4-2026-03-05" => "gpt-5.4",

// MiniMax aliases
"minimax-m2.5" => "minimax-m2.5",
"minimax-m2.5-20260211" => "minimax-m2.5",

// StepFun aliases
"step-3.5-flash" => "step-3.5-flash",

// Upstage aliases
"solar-pro-3" => "solar-pro-3",

// Aurora aliases
"aurora-alpha" => "aurora-alpha",
};
Comment thread
coderabbitai[bot] marked this conversation as resolved.

/// Get model info by any valid name (canonical or alias)
pub fn get_model_info(model_name: &str) -> Option<&ModelInfo> {
// First try direct lookup in model index
if let Some(model_info) = MODEL_INDEX.get(model_name) {
/// Free-tier model pricing for models accessed via OpenRouter's `:free` suffix
/// or other free-tier naming patterns.
static FREE_MODEL_INFO: ModelInfo = ModelInfo {
pricing: PricingStructure::Flat {
input_per_1m: 0.0,
output_per_1m: 0.0,
},
caching: CachingSupport::None,
is_estimated: false,
};

/// Look up a model name directly in the index and alias tables.
fn lookup_model(name: &str) -> Option<&'static ModelInfo> {
if let Some(model_info) = MODEL_INDEX.get(name) {
return Some(model_info);
}

// Then try alias lookup
if let Some(&canonical_name) = MODEL_ALIASES.get(model_name) {
if let Some(&canonical_name) = MODEL_ALIASES.get(name) {
return MODEL_INDEX.get(canonical_name);
}
None
}

/// Get model info by any valid name (canonical or alias).
///
/// Handles provider-prefixed model names (e.g. `minimax/minimax-m2.5`,
/// `z-ai/glm-5`, `openrouter/aurora-alpha`) by stripping the prefix before
/// lookup. Models with a `:free` suffix (OpenRouter free tier) always
/// return $0 pricing.
pub fn get_model_info(model_name: &str) -> Option<&'static ModelInfo> {
// Fast path: direct lookup
if let Some(info) = lookup_model(model_name) {
return Some(info);
}

// Normalize: strip provider prefix (everything before last `/`)
let after_slash = model_name
.rsplit_once('/')
.map(|(_, name)| name)
.unwrap_or(model_name);

// Handle `:free` suffix → always $0
if after_slash.strip_suffix(":free").is_some() {
return Some(&FREE_MODEL_INFO);
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// Handle other suffixes like `:extended`
let base_name = after_slash.strip_suffix(":extended").unwrap_or(after_slash);

// Try the normalized name (only if different from original)
if base_name != model_name
&& let Some(info) = lookup_model(base_name)
{
return Some(info);
}

// Also handle patterns like "minimax-m2.5-free" (without colon)
if base_name.strip_suffix("-free").is_some() {
return Some(&FREE_MODEL_INFO);
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

None
}
Expand Down
Loading