microsasa · microsasa · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/src/copilot_usage/report.py b/src/copilot_usage/report.py
@@ -971,7 +971,8 @@ def render_cost_view(
         grand_model_calls += s.model_calls
 
         if s.is_active:
-            if _has_active_period_stats(s):
+            has_active = _has_active_period_stats(s)
+            if has_active:
                 cost_calls = s.active_model_calls
                 cost_tokens = s.active_output_tokens
             else:
@@ -986,7 +987,16 @@ def render_cost_view(
                 str(cost_calls),
                 format_tokens(cost_tokens),
             )
-            grand_output += cost_tokens
+            # Only add active tokens when they represent a post-shutdown
+            # increment (shutdown-derived metrics have requests.count > 0)
+            # or when there are no model_metrics at all.  Pure-active
+            # synthetic metrics already mirror active_output_tokens so
+            # adding them again would double-count.
+            has_shutdown_metrics = any(
+                mm.requests.count > 0 for mm in s.model_metrics.values()
+            )
+            if (has_active and has_shutdown_metrics) or not s.model_metrics:
+                grand_output += cost_tokens
 
     table.add_section()
     table.add_row(

diff --git a/tests/copilot_usage/test_report.py b/tests/copilot_usage/test_report.py
@@ -1802,11 +1802,17 @@ def test_active_session_estimated_cost_known_model(self) -> None:
         # The "Since last shutdown" row should NOT show "N/A" for Premium Cost
         lines = output.splitlines()
         shutdown_line = next(
-            (line for line in lines if "Since last shutdown" in line), ""
+            (line for line in lines if "Since last shutdown" in line),
+            None,
+        )
+        assert shutdown_line is not None
+        assert shutdown_line.count("N/A") == 1
+        # Grand Total output tokens: 2000 (model_metrics) + 800 (active) = 2800 → "2.8K"
+        grand_row = next(line for line in lines if "Grand Total" in line)
+        grand_cols = [c.strip() for c in grand_row.split("│")]
+        assert "2.8K" in grand_cols[6], (
+            f"Grand Total output tokens should be 2.8K, got '{grand_cols[6]}'"
         )
-        assert "N/A" not in shutdown_line or shutdown_line.count("N/A") == 1
-
-    def test_estimated_cost_zero_for_free_model(self) -> None:
         """gpt-5-mini has 0× multiplier → estimated cost is 0."""
         session = SessionSummary(
             session_id="est-cost-free-mod",
@@ -1843,6 +1849,41 @@ def test_estimated_cost_premium_model_multiplier(self) -> None:
         # 3 calls × 3.0 multiplier = ~9
         assert "~9" in output
 
+    def test_pure_active_with_synthetic_metrics_no_double_count(self) -> None:
+        """Pure-active session with synthetic model_metrics must not double-count output tokens.
+
+        When build_session_summary creates a pure-active session, it sets both
+        model_metrics.outputTokens and active_output_tokens to the same total.
+        Grand Total must count them only once.
+        """
+        session = SessionSummary(
+            session_id="pure-synth-aaaa",
+            name="Pure Synth",
+            model="claude-sonnet-4",
+            start_time=datetime(2025, 1, 15, 10, 0, tzinfo=UTC),
+            is_active=True,
+            model_calls=5,
+            user_messages=3,
+            active_model_calls=5,
+            active_user_messages=3,
+            active_output_tokens=8000,
+            model_metrics={
+                "claude-sonnet-4": ModelMetrics(
+                    # Synthetic metrics have requests at defaults (count=0)
+                    usage=TokenUsage(outputTokens=8000),
+                )
+            },
+        )
+        output = _capture_cost_view([session])
+        clean = re.sub(r"\x1b\[[0-9;]*m", "", output)
+        lines = clean.splitlines()
+        grand_row = next(line for line in lines if "Grand Total" in line)
+        grand_cols = [c.strip() for c in grand_row.split("│")]
+        # 8000 → "8.0K", NOT 16.0K (which would indicate double-counting)
+        assert "8.0K" in grand_cols[6], (
+            f"Grand Total output tokens should be 8.0K, got '{grand_cols[6]}'"
+        )
+
     def test_pure_active_never_shutdown_cost_falls_back(self) -> None:
         """Cost view: pure-active session with active_*=0 uses totals for the active row.
 
@@ -1879,8 +1920,12 @@ def test_pure_active_never_shutdown_cost_falls_back(self) -> None:
         assert "50.0K" in cols[6], (
             f"Output Tokens in active row should be 50.0K, got '{cols[6]}'"
         )
-
-    def test_active_model_calls_only_uses_active_path(self) -> None:
+        # Grand Total output tokens must NOT double-count: should be 50.0K, not 100.0K
+        grand_row = next(line for line in lines if "Grand Total" in line)
+        grand_cols = [c.strip() for c in grand_row.split("│")]
+        assert "50.0K" in grand_cols[6], (
+            f"Grand Total output tokens should be 50.0K, got '{grand_cols[6]}'"
+        )
         """Cost view: active_model_calls > 0 with user_messages/output_tokens=0.
 
         When last_resume_time is None and only active_model_calls is non-zero,