Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions internal/budget/budget.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,11 @@ func (m *Manager) CalculateAllowance(provider string) (*AllowanceResult, error)
return nil, fmt.Errorf("invalid budget mode: %s", mode)
}

// Apply reserve enforcement
// Reserve holds back a fraction of the base budget so that a single
// nightshift run can't consume the entire remaining allocation.
result = m.applyReserve(result, reservePercent)
// Snapshot allowance before daytime reservation so the CLI can show
// what the budget would be without predicted daytime usage deducted.
result.AllowanceNoDaytime = result.Allowance
if m.trend != nil {
predicted, err := m.trend.PredictDaytimeUsage(provider, m.nowFunc(), weeklyBudget)
Expand Down Expand Up @@ -224,10 +227,11 @@ func (m *Manager) calculateWeeklyAllowance(weeklyBudget int64, usedPercent float

remainingWeekly := float64(weeklyBudget) * (1 - usedPercent/100)

// Aggressive end-of-week multiplier
// Aggressive end-of-week: spend remaining budget faster as the reset
// approaches, since unspent tokens are wasted after the weekly reset.
// Formula: 3 - remainingDays → 1x with 2 days left (no boost), 2x on last day.
multiplier := 1.0
if m.cfg.Budget.AggressiveEndOfWeek && remainingDays <= 2 {
// 2x on day before reset, 3x on last day
multiplier = float64(3 - remainingDays)
}

Expand Down Expand Up @@ -312,10 +316,10 @@ func (m *Manager) GetUsedPercent(provider string) (float64, error) {
if m.copilot == nil {
return 0, fmt.Errorf("copilot provider not configured")
}
// Copilot uses monthly request limits, not weekly token budgets
// Convert weekly budget to monthly limit for consistency
// Note: This is a simplification; actual monthly limits should be configured separately
monthlyLimit := weeklyBudget * 4 // Approximate: 4 weeks per month
// Copilot's API reports usage against a monthly request cap, not a weekly
// token budget. Multiply by 4 to convert our weekly budget figure into
// an approximate monthly limit so the percentage math stays consistent.
monthlyLimit := weeklyBudget * 4
return m.copilot.GetUsedPercent(mode, monthlyLimit)

default:
Expand Down
10 changes: 7 additions & 3 deletions internal/orchestrator/orchestrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -831,9 +831,10 @@ func ExtractPRURL(text string) string {
}

// inferReviewPassed attempts to detect pass/fail from unstructured text.
// This is a best-effort fallback for when the review agent returns prose
// instead of the requested JSON. We count keyword hits rather than relying
// on a single phrase because agent output format is unpredictable.
func (o *Orchestrator) inferReviewPassed(output string) bool {
// Simple heuristic: look for positive indicators
// This is a fallback when JSON parsing fails
passIndicators := []string{
"passed", "approved", "looks good", "lgtm", "ship it",
"no issues", "complete", "correct", "successful",
Expand Down Expand Up @@ -870,7 +871,6 @@ func containsIgnoreCase(s, substr string) bool {
return false
}

// Convert to lowercase for comparison
sLower := toLowerASCII(s)
substrLower := toLowerASCII(substr)

Expand All @@ -882,6 +882,10 @@ func containsIgnoreCase(s, substr string) bool {
return false
}

// toLowerASCII lowercases ASCII letters without allocating through
// strings.ToLower's full Unicode machinery. The review-keyword matching
// in inferReviewPassed only uses ASCII terms, so Unicode case-folding is
// unnecessary overhead on potentially large agent output.
func toLowerASCII(s string) string {
b := make([]byte, len(s))
for i := 0; i < len(s); i++ {
Expand Down
13 changes: 12 additions & 1 deletion internal/tasks/selector.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ func (s *Selector) SetTaskSources(sources []string) {

// ScoreTask calculates the priority score for a task.
// Formula: base_priority + staleness_bonus + context_bonus + task_source_bonus
//
// Scoring rationale:
// - Staleness (0.1/day, capped at 3.0): gentle upward pressure so neglected
// tasks eventually surface, without overwhelming explicit priority.
// - Context bonus (+2): tasks mentioned in claude.md/agents.md are likely
// relevant to current work, so they should rank above stale-but-irrelevant tasks.
// - Task source bonus (+3): tasks backed by a td issue or GitHub issue represent
// explicit human intent, so they outrank context mentions.
func (s *Selector) ScoreTask(taskType TaskType, project string) float64 {
var score float64

Expand Down Expand Up @@ -260,7 +268,10 @@ func (s *Selector) SelectAndAssign(budget int64, project string) *ScoredTask {
return nil
}

// Mark as assigned to prevent duplicate selection
// Mark as assigned to prevent duplicate selection by concurrent runs.
// Assignments are persisted in SQLite and automatically cleared after
// 2 hours (see ClearStaleAssignments) so a crashed run doesn't
// permanently block a task type.
taskID := makeTaskID(string(task.Definition.Type), project)
s.state.MarkAssigned(taskID, project, string(task.Definition.Type))

Expand Down
26 changes: 20 additions & 6 deletions internal/tmux/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ func ScrapeClaudeUsage(ctx context.Context) (UsageResult, error) {
return UsageResult{}, ErrTmuxNotFound
}

// 45s overall timeout: must cover CLI startup (~10-20s), the /usage command
// (~5-10s), plus margin for trust prompts and slow CI machines.
ctx, cancel := context.WithTimeout(ctx, 45*time.Second)
defer cancel()

Expand Down Expand Up @@ -79,7 +81,12 @@ func ScrapeClaudeUsage(ctx context.Context) (UsageResult, error) {
return UsageResult{}, err
}

// Wait for usage output
// 15s timeout: /usage output appears within a few seconds once the CLI is
// ready; 15s gives generous margin without inflating the overall 45s budget.
// 300ms poll interval: fast enough to detect output promptly without
// hammering tmux capture-pane in a tight loop.
// -S -200: capture last 200 lines of scrollback — usage output can appear
// well above the visible viewport when the TUI has pushed content up.
output, err := session.WaitForPattern(ctx, claudeWeekRegex, 15*time.Second, 300*time.Millisecond, "-S", "-200")
if err != nil {
return UsageResult{}, err
Expand Down Expand Up @@ -109,6 +116,7 @@ func ScrapeCodexUsage(ctx context.Context) (UsageResult, error) {
return UsageResult{}, ErrTmuxNotFound
}

// 45s overall timeout — same rationale as Claude: startup + command + margin.
ctx, cancel := context.WithTimeout(ctx, 45*time.Second)
defer cancel()

Expand Down Expand Up @@ -167,7 +175,7 @@ func ScrapeCodexUsage(ctx context.Context) (UsageResult, error) {
return UsageResult{}, err
}

// Wait for status output
// Same timing/capture rationale as the Claude scraper above.
output, err := session.WaitForPattern(ctx, codexWeekRegex, 15*time.Second, 300*time.Millisecond, "-S", "-200")
if err != nil {
return UsageResult{}, err
Expand Down Expand Up @@ -259,11 +267,16 @@ func waitForSubstantialContent(ctx context.Context, session *Session, timeout ti
return lastOutput, fmt.Errorf("timeout waiting for CLI (%d non-empty lines seen)",
countNonEmptyLines(StripANSI(lastOutput)))
case <-ticker.C:
// -S -50: only need recent 50 lines to detect startup; the full
// 200-line capture is reserved for the actual usage/status output.
output, err := session.CapturePane(ctx, "-S", "-50")
if err != nil {
continue
}
lastOutput = output
// >5 non-empty lines distinguishes a rendered TUI from a bare shell
// prompt (typically 1-2 lines). Threshold is intentionally low to
// avoid false negatives on minimal TUI layouts.
if countNonEmptyLines(StripANSI(output)) > 5 {
return output, nil
}
Expand Down Expand Up @@ -334,10 +347,11 @@ func parseCodexResetTimes(output string) (sessionReset, weeklyReset string) {
weeklyReset = m[1]
}

// Fallback: if primary weekly regex didn't match, find the last "(resets HH:MM on D Mon)"
// in the output. The weekly line is always shown last in Codex /status.
// Only use the fallback when we find a match distinct from the session reset
// (avoids misidentifying the 5h line as weekly when it's the only line).
// Fallback: Codex /status format has changed across versions. When the
// structured "Weekly limit" line isn't found, fall back to grabbing the
// last "(resets HH:MM on D Mon)" in the output — the weekly line is
// always printed last. We only accept a match distinct from the session
// reset to avoid misidentifying the 5h line as weekly.
if weeklyReset == "" {
fallbackRe := regexp.MustCompile(`\(resets\s+(\d{1,2}:\d{2}\s+on\s+\d{1,2}\s+\w+)\)`)
matches := fallbackRe.FindAllStringSubmatch(output, -1)
Expand Down
Loading