diff --git a/Cargo.lock b/Cargo.lock index 917f78c..4691df1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -160,26 +160,6 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" -[[package]] -name = "bincode" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" -dependencies = [ - "bincode_derive", - "serde", - "unty", -] - -[[package]] -name = "bincode_derive" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" -dependencies = [ - "virtue", -] - [[package]] name = "bit-set" version = "0.5.3" @@ -1968,9 +1948,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.13" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ "aws-lc-rs", "bytes", @@ -2237,9 +2217,9 @@ checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "reqwest" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04e9018c9d814e5f30cc16a0f03271aeab3571e609612d9fe78c1aa8d11c2f62" +checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801" dependencies = [ "base64", "bytes", @@ -2709,7 +2689,6 @@ version = "3.3.3" dependencies = [ "anyhow", "async-trait", - "bincode", "c2rust-bitfields", "chrono", "chrono-tz", @@ -3243,12 +3222,6 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" -[[package]] -name = "unty" -version = "0.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" - [[package]] name = "url" version = "2.5.8" @@ -3309,12 +3282,6 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" -[[package]] -name = "virtue" -version = "0.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" - [[package]] name = "vtparse" version = "0.6.2" diff --git a/Cargo.toml b/Cargo.toml index 6f5adf1..7117e98 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,7 +41,6 @@ tiktoken-rs = "0.9.1" parking_lot = "0.12" tinyvec = { version = "1.8", features = ["alloc"] } c2rust-bitfields = "0.18" -bincode = "2.0.1" dirs = "6.0" chrono-tz = "0.10" rusqlite = { version = "0.38.0", features = ["bundled"] } @@ -56,7 +55,7 @@ version = "4.5.53" features = ["derive"] [dependencies.reqwest] -version = "0.13.1" +version = "0.13.2" default-features = false features = ["rustls"] diff --git a/src/analyzers/codex_cli.rs b/src/analyzers/codex_cli.rs index ee6c704..060f50c 100644 --- a/src/analyzers/codex_cli.rs +++ b/src/analyzers/codex_cli.rs @@ -392,13 +392,11 @@ pub(crate) fn parse_codex_cli_jsonl_file( session_name: effective_name, }); } - "assistant" - // Token usage is now emitted immediately when processing token_count - // events. We still track assistant messages without additional stats - // to avoid double-counting when Codex emits separate reasoning/tool - // outputs. - if !saw_token_usage => - { + // Token usage is now emitted immediately when processing token_count + // events. We still track assistant messages without additional stats + // to avoid double-counting when Codex emits separate reasoning/tool + // outputs. + "assistant" if !saw_token_usage => { let model_state = session_model.clone().unwrap_or_else(|| { let fallback = SessionModel::inferred( DEFAULT_FALLBACK_MODEL.to_string(), diff --git a/src/analyzers/copilot.rs b/src/analyzers/copilot.rs index 7a8b88a..40e94a0 100644 --- a/src/analyzers/copilot.rs +++ b/src/analyzers/copilot.rs @@ -149,8 +149,8 @@ fn count_tokens(text: &str) -> u64 { // Recursively extract all text content from a nested JSON structure fn extract_text_from_value(value: &simd_json::OwnedValue, accumulated_text: &mut String) { match value { + // Only accumulate if it's a "text" field value, not metadata like URIs simd_json::OwnedValue::String(s) - // Only accumulate if it's a "text" field value, not metadata like URIs if !s.starts_with("vscode-") && !s.starts_with("file://") && !s.starts_with("ssh-remote") => diff --git a/src/models.rs b/src/models.rs index 06de394..eb90ba9 100644 --- a/src/models.rs +++ b/src/models.rs @@ -14,6 +14,14 @@ pub struct PricingTier { pub output_per_1m: f64, } +#[derive(Debug, Clone)] +pub struct TieredPricing { + /// Pricing tiers ordered from lowest threshold to highest. + pub tiers: &'static [PricingTier], + /// If true, bill the entire token count at the single matching tier's rate. + pub bracket_pricing: bool, +} + /// Different pricing structures supported by various model providers #[derive(Debug, Clone)] pub enum PricingStructure { @@ -23,7 +31,7 @@ pub enum PricingStructure { output_per_1m: f64, }, /// Tiered pricing (different costs based on token thresholds) - Tiered { tiers: &'static [PricingTier] }, + Tiered(TieredPricing), } /// Caching tier for models with tiered cache pricing @@ -35,6 +43,14 @@ pub struct CachingTier { pub cached_input_per_1m: f64, } +#[derive(Debug, Clone)] +pub struct TieredCaching { + /// Cache tiers ordered from lowest threshold to highest. + pub tiers: &'static [CachingTier], + /// If true, bill the entire token count at the single matching tier's rate. + pub bracket_pricing: bool, +} + /// Different caching support models #[derive(Debug, Clone)] pub enum CachingSupport { @@ -48,7 +64,7 @@ pub enum CachingSupport { cache_read_per_1m: f64, }, /// Google-style caching (may have tiers like input/output) - Google { tiers: &'static [CachingTier] }, + Google(TieredCaching), } /// Complete model information with all pricing details @@ -266,7 +282,7 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { }, is_estimated: false, }, - // GPT-5.1 Codex models (estimated pricing - API not yet published) + // GPT-5.1 Codex models "gpt-5.1-codex" => ModelInfo { pricing: PricingStructure::Flat { input_per_1m: 1.25, @@ -325,7 +341,7 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { }, is_estimated: false, }, - // GPT-5.3 Codex (estimated pricing - API not yet published) + // GPT-5.3 Codex "gpt-5.3-codex" => ModelInfo { pricing: PricingStructure::Flat { input_per_1m: 1.75, @@ -334,7 +350,7 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { caching: CachingSupport::OpenAI { cached_input_per_1m: 0.175, }, - is_estimated: true, + is_estimated: false, }, "gpt-5-pro" => ModelInfo { pricing: PricingStructure::Flat { @@ -345,7 +361,7 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { is_estimated: false, }, "gpt-5.4" => ModelInfo { - pricing: PricingStructure::Tiered { + pricing: PricingStructure::Tiered(TieredPricing { tiers: &[ PricingTier { max_tokens: Some(272_000), @@ -358,8 +374,9 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { output_per_1m: 22.5, }, ], - }, - caching: CachingSupport::Google { + bracket_pricing: false, + }), + caching: CachingSupport::Google(TieredCaching { tiers: &[ CachingTier { max_tokens: Some(272_000), @@ -370,7 +387,8 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { cached_input_per_1m: 0.50, }, ], - }, + bracket_pricing: false, + }), is_estimated: false, }, @@ -525,18 +543,50 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { input_per_1m: 0.5, output_per_1m: 3.0, }, - caching: CachingSupport::Google { + caching: CachingSupport::Google(TieredCaching { tiers: &[ CachingTier { max_tokens: None, cached_input_per_1m: 0.05, }, ], - }, + bracket_pricing: false, + }), + is_estimated: false, + }, + "gemini-3.1-pro-preview" => ModelInfo { + pricing: PricingStructure::Tiered(TieredPricing { + tiers: &[ + PricingTier { + max_tokens: Some(200_000), + input_per_1m: 2.0, + output_per_1m: 12.0, + }, + PricingTier { + max_tokens: None, + input_per_1m: 4.0, + output_per_1m: 18.0, + }, + ], + bracket_pricing: true, + }), + caching: CachingSupport::Google(TieredCaching { + tiers: &[ + CachingTier { + max_tokens: Some(200_000), + cached_input_per_1m: 0.20, + }, + CachingTier { + max_tokens: None, + cached_input_per_1m: 0.40, + }, + ], + bracket_pricing: true, + }), is_estimated: false, }, "gemini-3-pro-preview-11-2025" => ModelInfo { - pricing: PricingStructure::Tiered { + pricing: PricingStructure::Tiered(TieredPricing { tiers: &[ PricingTier { max_tokens: Some(200_000), @@ -549,12 +599,13 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { output_per_1m: 18.0, }, ], - }, + bracket_pricing: false, + }), caching: CachingSupport::None, is_estimated: false, }, "gemini-2.5-pro" => ModelInfo { - pricing: PricingStructure::Tiered { + pricing: PricingStructure::Tiered(TieredPricing { tiers: &[ PricingTier { max_tokens: Some(200_000), @@ -567,8 +618,9 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { output_per_1m: 15.0, }, ], - }, - caching: CachingSupport::Google { + bracket_pricing: false, + }), + caching: CachingSupport::Google(TieredCaching { tiers: &[ CachingTier { max_tokens: Some(200_000), @@ -579,7 +631,8 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { cached_input_per_1m: 0.625, }, ], - }, + bracket_pricing: false, + }), is_estimated: false, }, "gemini-2.5-flash" => ModelInfo { @@ -587,14 +640,15 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { input_per_1m: 0.3, output_per_1m: 2.5, }, - caching: CachingSupport::Google { + caching: CachingSupport::Google(TieredCaching { tiers: &[ CachingTier { max_tokens: None, cached_input_per_1m: 0.075, }, ], - }, + bracket_pricing: false, + }), is_estimated: false, }, "gemini-2.5-flash-lite" => ModelInfo { @@ -602,14 +656,15 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { input_per_1m: 0.1, output_per_1m: 0.4, }, - caching: CachingSupport::Google { + caching: CachingSupport::Google(TieredCaching { tiers: &[ CachingTier { max_tokens: None, cached_input_per_1m: 0.025, }, ], - }, + bracket_pricing: false, + }), is_estimated: false, }, "gemini-2.0-pro-exp-02-05" => ModelInfo { @@ -617,14 +672,15 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { input_per_1m: 0.0, output_per_1m: 0.0, }, - caching: CachingSupport::Google { + caching: CachingSupport::Google(TieredCaching { tiers: &[ CachingTier { max_tokens: None, cached_input_per_1m: 0.0, }, ], - }, + bracket_pricing: false, + }), is_estimated: false, }, "gemini-2.0-flash" => ModelInfo { @@ -632,14 +688,15 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { input_per_1m: 0.1, output_per_1m: 0.4, }, - caching: CachingSupport::Google { + caching: CachingSupport::Google(TieredCaching { tiers: &[ CachingTier { max_tokens: None, cached_input_per_1m: 0.025, }, ], - }, + bracket_pricing: false, + }), is_estimated: false, }, "gemini-2.0-flash-lite" => ModelInfo { @@ -651,7 +708,7 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { is_estimated: false, }, "gemini-1.5-flash" => ModelInfo { - pricing: PricingStructure::Tiered { + pricing: PricingStructure::Tiered(TieredPricing { tiers: &[ PricingTier { max_tokens: Some(128_000), @@ -664,8 +721,9 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { output_per_1m: 0.6, }, ], - }, - caching: CachingSupport::Google { + bracket_pricing: false, + }), + caching: CachingSupport::Google(TieredCaching { tiers: &[ CachingTier { max_tokens: Some(128_000), @@ -676,11 +734,12 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { cached_input_per_1m: 0.0375, }, ], - }, + bracket_pricing: false, + }), is_estimated: false, }, "gemini-1.5-flash-8b" => ModelInfo { - pricing: PricingStructure::Tiered { + pricing: PricingStructure::Tiered(TieredPricing { tiers: &[ PricingTier { max_tokens: Some(128_000), @@ -693,8 +752,9 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { output_per_1m: 0.3, }, ], - }, - caching: CachingSupport::Google { + bracket_pricing: false, + }), + caching: CachingSupport::Google(TieredCaching { tiers: &[ CachingTier { max_tokens: Some(128_000), @@ -705,11 +765,12 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { cached_input_per_1m: 0.02, }, ], - }, + bracket_pricing: false, + }), is_estimated: false, }, "gemini-1.5-pro" => ModelInfo { - pricing: PricingStructure::Tiered { + pricing: PricingStructure::Tiered(TieredPricing { tiers: &[ PricingTier { max_tokens: Some(128_000), @@ -722,8 +783,9 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { output_per_1m: 10.0, }, ], - }, - caching: CachingSupport::Google { + bracket_pricing: false, + }), + caching: CachingSupport::Google(TieredCaching { tiers: &[ CachingTier { max_tokens: Some(128_000), @@ -734,7 +796,8 @@ static MODEL_INDEX: phf::Map<&'static str, ModelInfo> = phf_map! { cached_input_per_1m: 0.625, }, ], - }, + bracket_pricing: false, + }), is_estimated: false, }, @@ -976,6 +1039,8 @@ static MODEL_ALIASES: phf::Map<&'static str, &'static str> = phf_map! { "gemini-3-flash-preview" => "gemini-3-flash-preview", "gemini-3-flash-preview-12-2025" => "gemini-3-flash-preview", "gemini-3-flash" => "gemini-3-flash-preview", + "gemini-3.1-pro-preview" => "gemini-3.1-pro-preview", + "gemini-3.1-pro" => "gemini-3.1-pro-preview", "gemini-3-pro-preview-11-2025" => "gemini-3-pro-preview-11-2025", "gemini-3-pro-preview" => "gemini-3-pro-preview-11-2025", "gemini-3-pro" => "gemini-3-pro-preview-11-2025", @@ -1113,7 +1178,9 @@ pub fn calculate_input_cost(model_name: &str, input_tokens: u64) -> f64 { PricingStructure::Flat { input_per_1m, .. } => { (input_tokens as f64 / 1_000_000.0) * input_per_1m } - PricingStructure::Tiered { tiers } => calculate_tiered_cost(input_tokens, tiers, true), + PricingStructure::Tiered(tiered) => { + calculate_tiered_cost(input_tokens, tiered.tiers, tiered.bracket_pricing, true) + } }, None => { warn_once(format!( @@ -1131,8 +1198,8 @@ pub fn calculate_output_cost(model_name: &str, output_tokens: u64) -> f64 { PricingStructure::Flat { output_per_1m, .. } => { (output_tokens as f64 / 1_000_000.0) * output_per_1m } - PricingStructure::Tiered { tiers } => { - calculate_tiered_cost(output_tokens, tiers, false) + PricingStructure::Tiered(tiered) => { + calculate_tiered_cost(output_tokens, tiered.tiers, tiered.bracket_pricing, false) } }, None => { @@ -1169,9 +1236,13 @@ pub fn calculate_cache_cost( let read_cost = (cache_read_tokens as f64 / 1_000_000.0) * cache_read_per_1m; creation_cost + read_cost } - CachingSupport::Google { tiers } => { + CachingSupport::Google(tiered) => { // Google only has read cost, calculate based on tiers - calculate_tiered_cache_cost(cache_read_tokens, tiers) + calculate_tiered_cache_cost( + cache_read_tokens, + tiered.tiers, + tiered.bracket_pricing, + ) } } } @@ -1199,17 +1270,38 @@ pub fn calculate_total_cost( input_cost + output_cost + cache_cost } -fn calculate_tiered_cost(tokens: u64, tiers: &[PricingTier], is_input: bool) -> f64 { +fn calculate_tiered_cost( + tokens: u64, + tiers: &[PricingTier], + bracket_pricing: bool, + is_input: bool, +) -> f64 { + if bracket_pricing { + if let Some(tier) = find_tier(tokens, tiers, |tier| tier.max_tokens) { + let rate = if is_input { + tier.input_per_1m + } else { + tier.output_per_1m + }; + + return (tokens as f64 / 1_000_000.0) * rate; + } + + return 0.0; + } + let mut total_cost = 0.0; let mut remaining_tokens = tokens; + let mut lower_bound = 0; for tier in tiers { if remaining_tokens == 0 { break; } - let tier_limit = tier.max_tokens.unwrap_or(u64::MAX); - let tokens_in_tier = remaining_tokens.min(tier_limit); + let upper_bound = tier.max_tokens.unwrap_or(u64::MAX); + let tier_width = upper_bound.saturating_sub(lower_bound); + let tokens_in_tier = remaining_tokens.min(tier_width); let rate = if is_input { tier.input_per_1m @@ -1219,27 +1311,81 @@ fn calculate_tiered_cost(tokens: u64, tiers: &[PricingTier], is_input: bool) -> total_cost += (tokens_in_tier as f64 / 1_000_000.0) * rate; remaining_tokens = remaining_tokens.saturating_sub(tokens_in_tier); + lower_bound = upper_bound; } total_cost } -fn calculate_tiered_cache_cost(tokens: u64, tiers: &[CachingTier]) -> f64 { +fn calculate_tiered_cache_cost(tokens: u64, tiers: &[CachingTier], bracket_pricing: bool) -> f64 { + if bracket_pricing { + if let Some(tier) = find_tier(tokens, tiers, |tier| tier.max_tokens) { + return (tokens as f64 / 1_000_000.0) * tier.cached_input_per_1m; + } + + return 0.0; + } + let mut total_cost = 0.0; let mut remaining_tokens = tokens; + let mut lower_bound = 0; for tier in tiers { if remaining_tokens == 0 { break; } - let tier_limit = tier.max_tokens.unwrap_or(u64::MAX); - let tokens_in_tier = remaining_tokens.min(tier_limit); + let upper_bound = tier.max_tokens.unwrap_or(u64::MAX); + let tier_width = upper_bound.saturating_sub(lower_bound); + let tokens_in_tier = remaining_tokens.min(tier_width); total_cost += (tokens_in_tier as f64 / 1_000_000.0) * tier.cached_input_per_1m; remaining_tokens = remaining_tokens.saturating_sub(tokens_in_tier); + lower_bound = upper_bound; } total_cost } + +fn find_tier(tokens: u64, tiers: &[T], max_tokens: F) -> Option<&T> +where + F: Fn(&T) -> Option, +{ + for tier in tiers { + match max_tokens(tier) { + Some(limit) if tokens <= limit => return Some(tier), + None => return Some(tier), + _ => continue, + } + } + + None +} + +#[cfg(test)] +mod tests { + use super::{calculate_cache_cost, calculate_input_cost}; + + fn approx_eq(left: f64, right: f64) { + assert!((left - right).abs() < 1e-9, "left={left}, right={right}"); + } + + #[test] + fn gemini_3_1_pro_preview_uses_bracket_pricing_for_input() { + let cost = calculate_input_cost("gemini-3.1-pro-preview", 250_000); + approx_eq(cost, 1.0); + } + + #[test] + fn gemini_3_1_pro_preview_uses_bracket_pricing_for_cache_reads() { + let cost = calculate_cache_cost("gemini-3.1-pro-preview", 0, 250_000); + approx_eq(cost, 0.1); + } + + #[test] + fn gemini_2_5_pro_remains_progressive() { + let cost = calculate_input_cost("gemini-2.5-pro", 250_000); + approx_eq(cost, 0.375); + } +}