diff --git a/internal/budget/budget.go b/internal/budget/budget.go index 31a718c..732e602 100644 --- a/internal/budget/budget.go +++ b/internal/budget/budget.go @@ -168,8 +168,11 @@ func (m *Manager) CalculateAllowance(provider string) (*AllowanceResult, error) return nil, fmt.Errorf("invalid budget mode: %s", mode) } - // Apply reserve enforcement + // Reserve holds back a fraction of the base budget so that a single + // nightshift run can't consume the entire remaining allocation. result = m.applyReserve(result, reservePercent) + // Snapshot allowance before daytime reservation so the CLI can show + // what the budget would be without predicted daytime usage deducted. result.AllowanceNoDaytime = result.Allowance if m.trend != nil { predicted, err := m.trend.PredictDaytimeUsage(provider, m.nowFunc(), weeklyBudget) @@ -224,10 +227,11 @@ func (m *Manager) calculateWeeklyAllowance(weeklyBudget int64, usedPercent float remainingWeekly := float64(weeklyBudget) * (1 - usedPercent/100) - // Aggressive end-of-week multiplier + // Aggressive end-of-week: spend remaining budget faster as the reset + // approaches, since unspent tokens are wasted after the weekly reset. + // Formula: 3 - remainingDays → 1x with 2 days left (no boost), 2x on last day. multiplier := 1.0 if m.cfg.Budget.AggressiveEndOfWeek && remainingDays <= 2 { - // 2x on day before reset, 3x on last day multiplier = float64(3 - remainingDays) } @@ -312,10 +316,10 @@ func (m *Manager) GetUsedPercent(provider string) (float64, error) { if m.copilot == nil { return 0, fmt.Errorf("copilot provider not configured") } - // Copilot uses monthly request limits, not weekly token budgets - // Convert weekly budget to monthly limit for consistency - // Note: This is a simplification; actual monthly limits should be configured separately - monthlyLimit := weeklyBudget * 4 // Approximate: 4 weeks per month + // Copilot's API reports usage against a monthly request cap, not a weekly + // token budget. Multiply by 4 to convert our weekly budget figure into + // an approximate monthly limit so the percentage math stays consistent. + monthlyLimit := weeklyBudget * 4 return m.copilot.GetUsedPercent(mode, monthlyLimit) default: diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index 6141c95..5ed9dcb 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -831,9 +831,10 @@ func ExtractPRURL(text string) string { } // inferReviewPassed attempts to detect pass/fail from unstructured text. +// This is a best-effort fallback for when the review agent returns prose +// instead of the requested JSON. We count keyword hits rather than relying +// on a single phrase because agent output format is unpredictable. func (o *Orchestrator) inferReviewPassed(output string) bool { - // Simple heuristic: look for positive indicators - // This is a fallback when JSON parsing fails passIndicators := []string{ "passed", "approved", "looks good", "lgtm", "ship it", "no issues", "complete", "correct", "successful", @@ -870,7 +871,6 @@ func containsIgnoreCase(s, substr string) bool { return false } - // Convert to lowercase for comparison sLower := toLowerASCII(s) substrLower := toLowerASCII(substr) @@ -882,6 +882,10 @@ func containsIgnoreCase(s, substr string) bool { return false } +// toLowerASCII lowercases ASCII letters without allocating through +// strings.ToLower's full Unicode machinery. The review-keyword matching +// in inferReviewPassed only uses ASCII terms, so Unicode case-folding is +// unnecessary overhead on potentially large agent output. func toLowerASCII(s string) string { b := make([]byte, len(s)) for i := 0; i < len(s); i++ { diff --git a/internal/tasks/selector.go b/internal/tasks/selector.go index 7a5c4d3..f9703e7 100644 --- a/internal/tasks/selector.go +++ b/internal/tasks/selector.go @@ -56,6 +56,14 @@ func (s *Selector) SetTaskSources(sources []string) { // ScoreTask calculates the priority score for a task. // Formula: base_priority + staleness_bonus + context_bonus + task_source_bonus +// +// Scoring rationale: +// - Staleness (0.1/day, capped at 3.0): gentle upward pressure so neglected +// tasks eventually surface, without overwhelming explicit priority. +// - Context bonus (+2): tasks mentioned in claude.md/agents.md are likely +// relevant to current work, so they should rank above stale-but-irrelevant tasks. +// - Task source bonus (+3): tasks backed by a td issue or GitHub issue represent +// explicit human intent, so they outrank context mentions. func (s *Selector) ScoreTask(taskType TaskType, project string) float64 { var score float64 @@ -260,7 +268,10 @@ func (s *Selector) SelectAndAssign(budget int64, project string) *ScoredTask { return nil } - // Mark as assigned to prevent duplicate selection + // Mark as assigned to prevent duplicate selection by concurrent runs. + // Assignments are persisted in SQLite and automatically cleared after + // 2 hours (see ClearStaleAssignments) so a crashed run doesn't + // permanently block a task type. taskID := makeTaskID(string(task.Definition.Type), project) s.state.MarkAssigned(taskID, project, string(task.Definition.Type)) diff --git a/internal/tmux/scraper.go b/internal/tmux/scraper.go index 55f4d31..e639d06 100644 --- a/internal/tmux/scraper.go +++ b/internal/tmux/scraper.go @@ -32,6 +32,8 @@ func ScrapeClaudeUsage(ctx context.Context) (UsageResult, error) { return UsageResult{}, ErrTmuxNotFound } + // 45s overall timeout: must cover CLI startup (~10-20s), the /usage command + // (~5-10s), plus margin for trust prompts and slow CI machines. ctx, cancel := context.WithTimeout(ctx, 45*time.Second) defer cancel() @@ -79,7 +81,12 @@ func ScrapeClaudeUsage(ctx context.Context) (UsageResult, error) { return UsageResult{}, err } - // Wait for usage output + // 15s timeout: /usage output appears within a few seconds once the CLI is + // ready; 15s gives generous margin without inflating the overall 45s budget. + // 300ms poll interval: fast enough to detect output promptly without + // hammering tmux capture-pane in a tight loop. + // -S -200: capture last 200 lines of scrollback — usage output can appear + // well above the visible viewport when the TUI has pushed content up. output, err := session.WaitForPattern(ctx, claudeWeekRegex, 15*time.Second, 300*time.Millisecond, "-S", "-200") if err != nil { return UsageResult{}, err @@ -109,6 +116,7 @@ func ScrapeCodexUsage(ctx context.Context) (UsageResult, error) { return UsageResult{}, ErrTmuxNotFound } + // 45s overall timeout — same rationale as Claude: startup + command + margin. ctx, cancel := context.WithTimeout(ctx, 45*time.Second) defer cancel() @@ -167,7 +175,7 @@ func ScrapeCodexUsage(ctx context.Context) (UsageResult, error) { return UsageResult{}, err } - // Wait for status output + // Same timing/capture rationale as the Claude scraper above. output, err := session.WaitForPattern(ctx, codexWeekRegex, 15*time.Second, 300*time.Millisecond, "-S", "-200") if err != nil { return UsageResult{}, err @@ -259,11 +267,16 @@ func waitForSubstantialContent(ctx context.Context, session *Session, timeout ti return lastOutput, fmt.Errorf("timeout waiting for CLI (%d non-empty lines seen)", countNonEmptyLines(StripANSI(lastOutput))) case <-ticker.C: + // -S -50: only need recent 50 lines to detect startup; the full + // 200-line capture is reserved for the actual usage/status output. output, err := session.CapturePane(ctx, "-S", "-50") if err != nil { continue } lastOutput = output + // >5 non-empty lines distinguishes a rendered TUI from a bare shell + // prompt (typically 1-2 lines). Threshold is intentionally low to + // avoid false negatives on minimal TUI layouts. if countNonEmptyLines(StripANSI(output)) > 5 { return output, nil } @@ -334,10 +347,11 @@ func parseCodexResetTimes(output string) (sessionReset, weeklyReset string) { weeklyReset = m[1] } - // Fallback: if primary weekly regex didn't match, find the last "(resets HH:MM on D Mon)" - // in the output. The weekly line is always shown last in Codex /status. - // Only use the fallback when we find a match distinct from the session reset - // (avoids misidentifying the 5h line as weekly when it's the only line). + // Fallback: Codex /status format has changed across versions. When the + // structured "Weekly limit" line isn't found, fall back to grabbing the + // last "(resets HH:MM on D Mon)" in the output — the weekly line is + // always printed last. We only accept a match distinct from the session + // reset to avoid misidentifying the 5h line as weekly. if weeklyReset == "" { fallbackRe := regexp.MustCompile(`\(resets\s+(\d{1,2}:\d{2}\s+on\s+\d{1,2}\s+\w+)\)`) matches := fallbackRe.FindAllStringSubmatch(output, -1)