From d97db810fe42baab39c057e66b70fde54c25963b Mon Sep 17 00:00:00 2001 From: Marcus Vorwaller Date: Sun, 22 Mar 2026 04:19:09 -0700 Subject: [PATCH] docs: add 'why does this exist' comments to non-obvious logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Annotate budget calculations (multiplier formula, copilot conversion, daytime reservation, reserve), tmux scraping (timeouts, capture ranges, content thresholds, fallback regex), orchestrator (review keyword heuristic, hand-rolled ASCII lowercasing), and task selection (scoring constants, assignment lifetime). Also fixes the factually incorrect existing comment that claimed "2x on day before reset, 3x on last day" when the formula actually gives 1x/2x. Comments only — no behavioral changes. Nightshift-Task: why-annotator Nightshift-Ref: https://github.com/marcus/nightshift Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/budget/budget.go | 18 +++++++++++------- internal/orchestrator/orchestrator.go | 10 +++++++--- internal/tasks/selector.go | 13 ++++++++++++- internal/tmux/scraper.go | 26 ++++++++++++++++++++------ 4 files changed, 50 insertions(+), 17 deletions(-) diff --git a/internal/budget/budget.go b/internal/budget/budget.go index 31a718c..732e602 100644 --- a/internal/budget/budget.go +++ b/internal/budget/budget.go @@ -168,8 +168,11 @@ func (m *Manager) CalculateAllowance(provider string) (*AllowanceResult, error) return nil, fmt.Errorf("invalid budget mode: %s", mode) } - // Apply reserve enforcement + // Reserve holds back a fraction of the base budget so that a single + // nightshift run can't consume the entire remaining allocation. result = m.applyReserve(result, reservePercent) + // Snapshot allowance before daytime reservation so the CLI can show + // what the budget would be without predicted daytime usage deducted. result.AllowanceNoDaytime = result.Allowance if m.trend != nil { predicted, err := m.trend.PredictDaytimeUsage(provider, m.nowFunc(), weeklyBudget) @@ -224,10 +227,11 @@ func (m *Manager) calculateWeeklyAllowance(weeklyBudget int64, usedPercent float remainingWeekly := float64(weeklyBudget) * (1 - usedPercent/100) - // Aggressive end-of-week multiplier + // Aggressive end-of-week: spend remaining budget faster as the reset + // approaches, since unspent tokens are wasted after the weekly reset. + // Formula: 3 - remainingDays → 1x with 2 days left (no boost), 2x on last day. multiplier := 1.0 if m.cfg.Budget.AggressiveEndOfWeek && remainingDays <= 2 { - // 2x on day before reset, 3x on last day multiplier = float64(3 - remainingDays) } @@ -312,10 +316,10 @@ func (m *Manager) GetUsedPercent(provider string) (float64, error) { if m.copilot == nil { return 0, fmt.Errorf("copilot provider not configured") } - // Copilot uses monthly request limits, not weekly token budgets - // Convert weekly budget to monthly limit for consistency - // Note: This is a simplification; actual monthly limits should be configured separately - monthlyLimit := weeklyBudget * 4 // Approximate: 4 weeks per month + // Copilot's API reports usage against a monthly request cap, not a weekly + // token budget. Multiply by 4 to convert our weekly budget figure into + // an approximate monthly limit so the percentage math stays consistent. + monthlyLimit := weeklyBudget * 4 return m.copilot.GetUsedPercent(mode, monthlyLimit) default: diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index 6141c95..5ed9dcb 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -831,9 +831,10 @@ func ExtractPRURL(text string) string { } // inferReviewPassed attempts to detect pass/fail from unstructured text. +// This is a best-effort fallback for when the review agent returns prose +// instead of the requested JSON. We count keyword hits rather than relying +// on a single phrase because agent output format is unpredictable. func (o *Orchestrator) inferReviewPassed(output string) bool { - // Simple heuristic: look for positive indicators - // This is a fallback when JSON parsing fails passIndicators := []string{ "passed", "approved", "looks good", "lgtm", "ship it", "no issues", "complete", "correct", "successful", @@ -870,7 +871,6 @@ func containsIgnoreCase(s, substr string) bool { return false } - // Convert to lowercase for comparison sLower := toLowerASCII(s) substrLower := toLowerASCII(substr) @@ -882,6 +882,10 @@ func containsIgnoreCase(s, substr string) bool { return false } +// toLowerASCII lowercases ASCII letters without allocating through +// strings.ToLower's full Unicode machinery. The review-keyword matching +// in inferReviewPassed only uses ASCII terms, so Unicode case-folding is +// unnecessary overhead on potentially large agent output. func toLowerASCII(s string) string { b := make([]byte, len(s)) for i := 0; i < len(s); i++ { diff --git a/internal/tasks/selector.go b/internal/tasks/selector.go index 7a5c4d3..f9703e7 100644 --- a/internal/tasks/selector.go +++ b/internal/tasks/selector.go @@ -56,6 +56,14 @@ func (s *Selector) SetTaskSources(sources []string) { // ScoreTask calculates the priority score for a task. // Formula: base_priority + staleness_bonus + context_bonus + task_source_bonus +// +// Scoring rationale: +// - Staleness (0.1/day, capped at 3.0): gentle upward pressure so neglected +// tasks eventually surface, without overwhelming explicit priority. +// - Context bonus (+2): tasks mentioned in claude.md/agents.md are likely +// relevant to current work, so they should rank above stale-but-irrelevant tasks. +// - Task source bonus (+3): tasks backed by a td issue or GitHub issue represent +// explicit human intent, so they outrank context mentions. func (s *Selector) ScoreTask(taskType TaskType, project string) float64 { var score float64 @@ -260,7 +268,10 @@ func (s *Selector) SelectAndAssign(budget int64, project string) *ScoredTask { return nil } - // Mark as assigned to prevent duplicate selection + // Mark as assigned to prevent duplicate selection by concurrent runs. + // Assignments are persisted in SQLite and automatically cleared after + // 2 hours (see ClearStaleAssignments) so a crashed run doesn't + // permanently block a task type. taskID := makeTaskID(string(task.Definition.Type), project) s.state.MarkAssigned(taskID, project, string(task.Definition.Type)) diff --git a/internal/tmux/scraper.go b/internal/tmux/scraper.go index 55f4d31..e639d06 100644 --- a/internal/tmux/scraper.go +++ b/internal/tmux/scraper.go @@ -32,6 +32,8 @@ func ScrapeClaudeUsage(ctx context.Context) (UsageResult, error) { return UsageResult{}, ErrTmuxNotFound } + // 45s overall timeout: must cover CLI startup (~10-20s), the /usage command + // (~5-10s), plus margin for trust prompts and slow CI machines. ctx, cancel := context.WithTimeout(ctx, 45*time.Second) defer cancel() @@ -79,7 +81,12 @@ func ScrapeClaudeUsage(ctx context.Context) (UsageResult, error) { return UsageResult{}, err } - // Wait for usage output + // 15s timeout: /usage output appears within a few seconds once the CLI is + // ready; 15s gives generous margin without inflating the overall 45s budget. + // 300ms poll interval: fast enough to detect output promptly without + // hammering tmux capture-pane in a tight loop. + // -S -200: capture last 200 lines of scrollback — usage output can appear + // well above the visible viewport when the TUI has pushed content up. output, err := session.WaitForPattern(ctx, claudeWeekRegex, 15*time.Second, 300*time.Millisecond, "-S", "-200") if err != nil { return UsageResult{}, err @@ -109,6 +116,7 @@ func ScrapeCodexUsage(ctx context.Context) (UsageResult, error) { return UsageResult{}, ErrTmuxNotFound } + // 45s overall timeout — same rationale as Claude: startup + command + margin. ctx, cancel := context.WithTimeout(ctx, 45*time.Second) defer cancel() @@ -167,7 +175,7 @@ func ScrapeCodexUsage(ctx context.Context) (UsageResult, error) { return UsageResult{}, err } - // Wait for status output + // Same timing/capture rationale as the Claude scraper above. output, err := session.WaitForPattern(ctx, codexWeekRegex, 15*time.Second, 300*time.Millisecond, "-S", "-200") if err != nil { return UsageResult{}, err @@ -259,11 +267,16 @@ func waitForSubstantialContent(ctx context.Context, session *Session, timeout ti return lastOutput, fmt.Errorf("timeout waiting for CLI (%d non-empty lines seen)", countNonEmptyLines(StripANSI(lastOutput))) case <-ticker.C: + // -S -50: only need recent 50 lines to detect startup; the full + // 200-line capture is reserved for the actual usage/status output. output, err := session.CapturePane(ctx, "-S", "-50") if err != nil { continue } lastOutput = output + // >5 non-empty lines distinguishes a rendered TUI from a bare shell + // prompt (typically 1-2 lines). Threshold is intentionally low to + // avoid false negatives on minimal TUI layouts. if countNonEmptyLines(StripANSI(output)) > 5 { return output, nil } @@ -334,10 +347,11 @@ func parseCodexResetTimes(output string) (sessionReset, weeklyReset string) { weeklyReset = m[1] } - // Fallback: if primary weekly regex didn't match, find the last "(resets HH:MM on D Mon)" - // in the output. The weekly line is always shown last in Codex /status. - // Only use the fallback when we find a match distinct from the session reset - // (avoids misidentifying the 5h line as weekly when it's the only line). + // Fallback: Codex /status format has changed across versions. When the + // structured "Weekly limit" line isn't found, fall back to grabbing the + // last "(resets HH:MM on D Mon)" in the output — the weekly line is + // always printed last. We only accept a match distinct from the session + // reset to avoid misidentifying the 5h line as weekly. if weeklyReset == "" { fallbackRe := regexp.MustCompile(`\(resets\s+(\d{1,2}:\d{2}\s+on\s+\d{1,2}\s+\w+)\)`) matches := fallbackRe.FindAllStringSubmatch(output, -1)