From 4091e2cfc4f81bfdd3a3ff88e92c5d5b29ccc5e2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Apr 2026 14:47:04 +0000 Subject: [PATCH 1/2] Initial plan From 3b945e407ce7d53d560d3eae563a307f4c7b2559 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Apr 2026 14:56:43 +0000 Subject: [PATCH 2/2] Support metric_direction: lower in program frontmatter Agent-Logs-Url: https://github.com/githubnext/autoloop/sessions/8704c8f5-8447-4d4c-be34-14c3e62ab8fe Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- .../workflows/scripts/autoloop_scheduler.py | 106 +++++++++-- tests/test_scheduling.py | 172 ++++++++++++++++-- workflows/autoloop.md | 51 +++++- workflows/scripts/autoloop_scheduler.py | 106 +++++++++-- 4 files changed, 383 insertions(+), 52 deletions(-) diff --git a/.github/workflows/scripts/autoloop_scheduler.py b/.github/workflows/scripts/autoloop_scheduler.py index 827e3f7..40ac1d9 100644 --- a/.github/workflows/scripts/autoloop_scheduler.py +++ b/.github/workflows/scripts/autoloop_scheduler.py @@ -155,9 +155,14 @@ def parse_link_header(header): def parse_program_frontmatter(content): - """Parse optional YAML frontmatter for ``schedule`` and ``target-metric``. + """Parse optional YAML frontmatter for ``schedule``, ``target-metric``, and ``metric_direction``. - Returns ``(schedule_delta, target_metric, target_metric_invalid_value)``. + Returns ``(schedule_delta, target_metric, target_metric_invalid_value, + metric_direction, metric_direction_invalid_value)``. + + ``metric_direction`` is one of ``"higher"`` (default) or ``"lower"``. + Invalid values fall back to ``"higher"`` and the raw string is returned in + the fifth element so the caller can warn. The third element is the raw string of an invalid ``target-metric`` value (so the caller can warn), or ``None`` when the value parsed cleanly or was absent. @@ -167,20 +172,41 @@ def parse_program_frontmatter(content): schedule_delta = None target_metric = None target_metric_invalid = None + metric_direction = "higher" + metric_direction_invalid = None fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content_stripped, re.DOTALL) if not fm_match: - return schedule_delta, target_metric, target_metric_invalid + return ( + schedule_delta, + target_metric, + target_metric_invalid, + metric_direction, + metric_direction_invalid, + ) for line in fm_match.group(1).split("\n"): - if line.strip().startswith("schedule:"): + stripped = line.strip() + if stripped.startswith("schedule:"): schedule_str = line.split(":", 1)[1].strip() schedule_delta = parse_schedule(schedule_str) - if line.strip().startswith("target-metric:"): + if stripped.startswith("target-metric:"): raw = line.split(":", 1)[1].strip() try: target_metric = float(raw) except (ValueError, TypeError): target_metric_invalid = raw - return schedule_delta, target_metric, target_metric_invalid + if stripped.startswith("metric_direction:") or stripped.startswith("metric-direction:"): + raw = line.split(":", 1)[1].strip().strip('"').strip("'").lower() + if raw in ("higher", "lower"): + metric_direction = raw + else: + metric_direction_invalid = raw + return ( + schedule_delta, + target_metric, + target_metric_invalid, + metric_direction, + metric_direction_invalid, + ) def is_unconfigured(content): @@ -363,12 +389,22 @@ def _parse_target_metric_from_file(path): """Re-parse a program file to extract its ``target-metric``, if any.""" try: with open(path) as f: - _, target_metric, _ = parse_program_frontmatter(f.read()) + _, target_metric, _, _, _ = parse_program_frontmatter(f.read()) return target_metric except (OSError, ValueError, TypeError): return None +def _parse_metric_direction_from_file(path): + """Re-parse a program file to extract its ``metric_direction`` (default ``"higher"``).""" + try: + with open(path) as f: + _, _, _, direction, _ = parse_program_frontmatter(f.read()) + return direction or "higher" + except (OSError, ValueError, TypeError): + return "higher" + + # --------------------------------------------------------------------------- # Existing PR lookup (single-PR-per-program invariant) # --------------------------------------------------------------------------- @@ -459,8 +495,10 @@ def select_program(due, forced_program=None, all_programs=None, unconfigured=Non """Pick the program to run. Returns ``(selected, selected_file, selected_issue, selected_target_metric, - deferred, error)``. ``error`` is a string describing why a forced selection - failed (and the caller should ``sys.exit(1)``); otherwise it is ``None``. + selected_metric_direction, deferred, error)``. ``error`` is a string describing + why a forced selection failed (and the caller should ``sys.exit(1)``); + otherwise it is ``None``. ``selected_metric_direction`` is one of + ``"higher"`` (default) or ``"lower"``. """ all_programs = all_programs or {} unconfigured = unconfigured or [] @@ -468,14 +506,14 @@ def select_program(due, forced_program=None, all_programs=None, unconfigured=Non if forced_program: if forced_program not in all_programs: return ( - None, None, None, None, [], + None, None, None, None, "higher", [], "requested program '{}' not found. Available programs: {}".format( forced_program, list(all_programs.keys()) ), ) if forced_program in unconfigured: return ( - None, None, None, None, [], + None, None, None, None, "higher", [], "requested program '{}' is unconfigured (has placeholders).".format( forced_program ), @@ -487,13 +525,25 @@ def select_program(due, forced_program=None, all_programs=None, unconfigured=Non issue_programs[selected]["issue_number"] if selected in issue_programs else None ) selected_target_metric = None + selected_metric_direction = None for p in due: if p["name"] == forced_program: selected_target_metric = p.get("target_metric") + selected_metric_direction = p.get("metric_direction") break if selected_target_metric is None: selected_target_metric = _parse_target_metric_from_file(selected_file) - return selected, selected_file, selected_issue, selected_target_metric, deferred, None + if selected_metric_direction is None: + selected_metric_direction = _parse_metric_direction_from_file(selected_file) + return ( + selected, + selected_file, + selected_issue, + selected_target_metric, + selected_metric_direction, + deferred, + None, + ) if due: # Normal scheduling: pick the single most-overdue program. @@ -502,13 +552,22 @@ def select_program(due, forced_program=None, all_programs=None, unconfigured=Non selected = due_sorted[0]["name"] selected_file = due_sorted[0]["file"] selected_target_metric = due_sorted[0].get("target_metric") + selected_metric_direction = due_sorted[0].get("metric_direction") or "higher" deferred = [p["name"] for p in due_sorted[1:]] selected_issue = ( issue_programs[selected]["issue_number"] if selected in issue_programs else None ) - return selected, selected_file, selected_issue, selected_target_metric, deferred, None + return ( + selected, + selected_file, + selected_issue, + selected_target_metric, + selected_metric_direction, + deferred, + None, + ) - return None, None, None, None, [], None + return None, None, None, None, "higher", [], None # --------------------------------------------------------------------------- @@ -574,9 +633,15 @@ def main(): unconfigured.append(name) continue - schedule_delta, target_metric, invalid_target = parse_program_frontmatter(content) + schedule_delta, target_metric, invalid_target, metric_direction, invalid_direction = parse_program_frontmatter(content) if invalid_target is not None: print(" Warning: {} has invalid target-metric value: {}".format(name, invalid_target)) + if invalid_direction is not None: + print( + " Warning: {} has invalid metric_direction value: {!r} (must be 'higher' or 'lower'); defaulting to 'higher'".format( + name, invalid_direction + ) + ) # Read state from repo-memory state = read_program_state(name) @@ -613,9 +678,15 @@ def main(): ) continue - due.append({"name": name, "last_run": lr, "file": pf, "target_metric": target_metric}) + due.append({ + "name": name, + "last_run": lr, + "file": pf, + "target_metric": target_metric, + "metric_direction": metric_direction, + }) - selected, selected_file, selected_issue, selected_target_metric, deferred, error = ( + selected, selected_file, selected_issue, selected_target_metric, selected_metric_direction, deferred, error = ( select_program(due, forced_program, all_programs, unconfigured, issue_programs) ) @@ -645,6 +716,7 @@ def main(): "selected_file": selected_file, "selected_issue": selected_issue, "selected_target_metric": selected_target_metric, + "selected_metric_direction": selected_metric_direction, "state_file_size_bytes": get_state_file_size(selected) if selected else 0, "state_file_max_bytes": STATE_FILE_MAX_BYTES, "issue_programs": { diff --git a/tests/test_scheduling.py b/tests/test_scheduling.py index 7064391..5f124d0 100644 --- a/tests/test_scheduling.py +++ b/tests/test_scheduling.py @@ -39,11 +39,19 @@ def slugify_issue_title(title): def parse_frontmatter(content): - """Two-tuple wrapper over the scheduler's three-tuple frontmatter parser.""" - schedule_delta, target_metric, _ = autoloop_scheduler.parse_program_frontmatter(content) + """Two-tuple wrapper over the scheduler's frontmatter parser.""" + schedule_delta, target_metric, _, _, _ = autoloop_scheduler.parse_program_frontmatter(content) return schedule_delta, target_metric +def parse_frontmatter_full(content): + """Three-tuple wrapper exposing ``metric_direction`` for direction-aware tests.""" + schedule_delta, target_metric, _, metric_direction, _ = ( + autoloop_scheduler.parse_program_frontmatter(content) + ) + return schedule_delta, target_metric, metric_direction + + def check_if_due(schedule_delta, last_run, now): """Replicates the inline due check: ``(is_due, next_due_iso_or_None)``.""" if schedule_delta and last_run: @@ -341,6 +349,119 @@ def test_extra_frontmatter_fields_ignored(self): assert target == 1.0 +# --------------------------------------------------------------------------- +# parse_program_frontmatter — metric_direction +# --------------------------------------------------------------------------- + +class TestMetricDirectionParsing: + def test_default_is_higher_when_omitted(self): + content = "---\nschedule: every 6h\ntarget-metric: 0.95\n---\n\n# Program\n" + _, _, direction = parse_frontmatter_full(content) + assert direction == "higher" + + def test_no_frontmatter_defaults_to_higher(self): + _, _, direction = parse_frontmatter_full("# Program\n\nNo frontmatter.\n") + assert direction == "higher" + + def test_explicit_higher(self): + content = "---\nmetric_direction: higher\n---\n\n# Program\n" + _, _, direction = parse_frontmatter_full(content) + assert direction == "higher" + + def test_explicit_lower(self): + content = "---\nmetric_direction: lower\n---\n\n# Program\n" + _, _, direction = parse_frontmatter_full(content) + assert direction == "lower" + + def test_lower_with_target_metric(self): + content = "---\nschedule: every 6h\ntarget-metric: 0.9\nmetric_direction: lower\n---\n\n# Program\n" + schedule, target, direction = parse_frontmatter_full(content) + assert schedule == timedelta(hours=6) + assert target == 0.9 + assert direction == "lower" + + def test_invalid_value_falls_back_to_higher(self): + content = "---\nmetric_direction: sideways\n---\n\n# Program\n" + _, target_metric, target_invalid, direction, direction_invalid = ( + autoloop_scheduler.parse_program_frontmatter(content) + ) + assert direction == "higher" + assert direction_invalid == "sideways" + + def test_quoted_value_accepted(self): + content = '---\nmetric_direction: "lower"\n---\n\n# Program\n' + _, _, direction = parse_frontmatter_full(content) + assert direction == "lower" + + def test_dashed_alias_accepted(self): + # Accept either `metric_direction:` or `metric-direction:` for parity with `target-metric`. + content = "---\nmetric-direction: lower\n---\n\n# Program\n" + _, _, direction = parse_frontmatter_full(content) + assert direction == "lower" + + def test_case_insensitive(self): + content = "---\nmetric_direction: LOWER\n---\n\n# Program\n" + _, _, direction = parse_frontmatter_full(content) + assert direction == "lower" + + +# --------------------------------------------------------------------------- +# Direction-aware improvement and halting condition (semantics expected by the +# agent prompt — see workflows/autoloop.md Step 5 and Halting Condition). +# --------------------------------------------------------------------------- + +def _improved(new_metric, best_metric, direction): + """Reference implementation of the direction-aware improvement check that + the agent applies in Step 5 of the workflow. Lives in the test file so we + pin the semantics that workflows/autoloop.md documents.""" + if best_metric is None: + return True + if direction == "lower": + return new_metric < best_metric + return new_metric > best_metric + + +def _target_met(best_metric, target_metric, direction): + """Reference implementation of the direction-aware halting condition.""" + if target_metric is None or best_metric is None: + return False + if direction == "lower": + return best_metric <= target_metric + return best_metric >= target_metric + + +class TestDirectionAwareImprovement: + def test_higher_improvement(self): + assert _improved(1.7, 1.5, "higher") is True + assert _improved(1.3, 1.5, "higher") is False + assert _improved(1.5, 1.5, "higher") is False # equal is not strictly better + + def test_lower_improvement(self): + assert _improved(1.3, 1.5, "lower") is True + assert _improved(1.7, 1.5, "lower") is False + assert _improved(1.5, 1.5, "lower") is False + + def test_first_run_baseline_always_improves(self): + assert _improved(0.5, None, "higher") is True + assert _improved(0.5, None, "lower") is True + + +class TestDirectionAwareHalting: + def test_higher_halts_at_or_above_target(self): + assert _target_met(0.97, 0.95, "higher") is True + assert _target_met(0.95, 0.95, "higher") is True + assert _target_met(0.94, 0.95, "higher") is False + + def test_lower_halts_at_or_below_target(self): + assert _target_met(0.85, 0.9, "lower") is True + assert _target_met(0.9, 0.9, "lower") is True + assert _target_met(0.91, 0.9, "lower") is False + + def test_no_target_never_halts(self): + assert _target_met(0.5, None, "higher") is False + assert _target_met(0.5, None, "lower") is False + + # --------------------------------------------------------------------------- # is_unconfigured (inline pattern, program scanning loop) # --------------------------------------------------------------------------- @@ -492,7 +613,7 @@ def test_most_overdue_selected(self): {"name": "a", "last_run": "2025-01-15T06:00:00Z", "file": "a.md", "target_metric": 0.9}, {"name": "c", "last_run": "2025-01-15T11:00:00Z", "file": "c.md", "target_metric": None}, ] - selected, file, issue, target, deferred, err = select_program(due) + selected, file, issue, target, direction, deferred, err = select_program(due) assert selected == "a" assert file == "a.md" assert target == 0.9 @@ -508,7 +629,7 @@ def test_never_run_first(self): assert selected == "new" def test_empty_due_list(self): - selected, file, issue, target, deferred, err = select_program([]) + selected, file, issue, target, direction, deferred, err = select_program([]) assert selected is None assert deferred == [] @@ -517,7 +638,7 @@ def test_forced_program(self): {"name": "a", "last_run": "2025-01-15T06:00:00Z", "file": "a.md", "target_metric": 0.5}, ] all_progs = {"a": "a.md", "b": "b.md"} - selected, file, issue, target, deferred, err = select_program( + selected, file, issue, target, direction, deferred, err = select_program( due, forced_program="b", all_programs=all_progs ) assert selected == "b" @@ -525,14 +646,14 @@ def test_forced_program(self): assert err is None def test_forced_program_not_found(self): - selected, file, issue, target, deferred, err = select_program( + selected, file, issue, target, direction, deferred, err = select_program( [], forced_program="missing", all_programs={"a": "a.md"} ) assert selected is None assert "not found" in err def test_forced_program_unconfigured(self): - selected, file, issue, target, deferred, err = select_program( + selected, file, issue, target, direction, deferred, err = select_program( [], forced_program="a", all_programs={"a": "a.md"}, unconfigured=["a"] ) assert selected is None @@ -542,7 +663,7 @@ def test_forced_issue_program(self): due = [] all_progs = {"my-issue": "/tmp/gh-aw/issue-programs/my-issue.md"} issue_progs = {"my-issue": {"issue_number": 42, "file": "/tmp/x", "title": "X"}} - selected, file, issue, target, deferred, err = select_program( + selected, file, issue, target, direction, deferred, err = select_program( due, forced_program="my-issue", all_programs=all_progs, issue_programs=issue_progs ) assert selected == "my-issue" @@ -553,7 +674,7 @@ def test_issue_program_selected_normally(self): {"name": "my-issue", "last_run": None, "file": "/tmp/my-issue.md", "target_metric": None}, ] issue_progs = {"my-issue": {"issue_number": 7, "file": "/tmp/x", "title": "X"}} - selected, file, issue, target, deferred, err = select_program( + selected, file, issue, target, direction, deferred, err = select_program( due, issue_programs=issue_progs ) assert selected == "my-issue" @@ -564,7 +685,7 @@ def test_forced_program_gets_target_metric_from_due(self): {"name": "a", "last_run": "2025-01-15T06:00:00Z", "file": "a.md", "target_metric": 0.99}, ] all_progs = {"a": "a.md"} - selected, file, issue, target, deferred, err = select_program( + selected, file, issue, target, direction, deferred, err = select_program( due, forced_program="a", all_programs=all_progs ) assert target == 0.99 @@ -575,7 +696,7 @@ def test_forced_program_not_in_due_select_returns_none(self): # directly from the program file (see forced-program fallback in the workflow). due = [] all_progs = {"a": "a.md"} - selected, file, issue, target, deferred, err = select_program( + selected, file, issue, target, direction, deferred, err = select_program( due, forced_program="a", all_programs=all_progs ) assert selected == "a" @@ -587,6 +708,35 @@ def test_forced_program_target_metric_fallback_via_frontmatter(self): _, target = parse_frontmatter(content) assert target == 0.95 + def test_metric_direction_plumbed_through_due(self): + due = [ + { + "name": "min-loss", + "last_run": "2025-01-15T06:00:00Z", + "file": "min-loss.md", + "target_metric": 0.9, + "metric_direction": "lower", + }, + ] + selected, file, issue, target, direction, deferred, err = select_program(due) + assert selected == "min-loss" + assert direction == "lower" + assert err is None + + def test_metric_direction_defaults_to_higher_when_absent_from_due_entry(self): + # Legacy due entries (no metric_direction key) must still default to "higher". + due = [ + { + "name": "legacy", + "last_run": "2025-01-15T06:00:00Z", + "file": "legacy.md", + "target_metric": None, + }, + ] + selected, file, issue, target, direction, deferred, err = select_program(due) + assert selected == "legacy" + assert direction == "higher" + # --------------------------------------------------------------------------- # parseLinkHeader — extract next-page URL from GitHub API Link header diff --git a/workflows/autoloop.md b/workflows/autoloop.md index 5f14579..969d21f 100644 --- a/workflows/autoloop.md +++ b/workflows/autoloop.md @@ -194,6 +194,7 @@ The pre-step has already determined which program to run. Read `/tmp/gh-aw/autol - **`selected_file`**: The full path to the program's markdown file (either `.autoloop/programs//program.md`, `.autoloop/programs/.md`, or `/tmp/gh-aw/issue-programs/.md` for issue-based programs). - **`selected_issue`**: The GitHub issue number if the selected program came from an issue, or `null` if it came from a file. - **`selected_target_metric`**: The `target-metric` value from the program's frontmatter (a number), or `null` if the program is open-ended. Used to check the [halting condition](#halting-condition) after each accepted iteration. +- **`selected_metric_direction`**: One of `"higher"` (default) or `"lower"`, parsed from the program's `metric_direction` frontmatter field. Determines whether **larger** or **smaller** metric values count as improvement. Used by the metric-improved check in [Step 5](#step-5-accept-or-reject), the iteration-history delta sign, and the [halting condition](#halting-condition). - **`state_file_size_bytes`**: Current size of the selected program's state file in bytes (0 if it does not exist yet). Use this together with `state_file_max_bytes` to decide whether to compact aggressively this iteration (see [Update Rules](#update-rules) — when size exceeds 80% of the max, collapse older iteration entries). - **`state_file_max_bytes`**: The configured `max-file-size` for repo-memory state files (default `30720`, i.e. 30 KB). Files larger than this are rejected by repo-memory, breaking scheduling. - **`issue_programs`**: A mapping of program name → issue number for all discovered issue-based programs. @@ -257,7 +258,7 @@ schedule: every 1h ### Target Metric (Halting Condition) -Programs can optionally specify a `target-metric` in the frontmatter to define a halting condition. When the metric reaches or surpasses the target, the program is automatically **completed**: the `autoloop-program` label is removed and an `autoloop-completed` label is added (for issue-based programs), and the state file is marked `Completed: true`. +Programs can optionally specify a `target-metric` in the frontmatter to define a halting condition. When the metric reaches or surpasses the target (in the direction set by `metric_direction`), the program is automatically **completed**: the `autoloop-program` label is removed and an `autoloop-completed` label is added (for issue-based programs), and the state file is marked `Completed: true`. Programs without a `target-metric` are **open-ended** and run indefinitely until manually stopped. @@ -271,6 +272,28 @@ target-metric: 0.95 ... ``` +### Metric Direction + +By default Autoloop assumes **higher is better** — `best_metric` is ratcheted up each accepted iteration, and a `target-metric` is met when `best_metric >= target-metric`. Programs whose natural fitness is *lower is better* (error, latency, cost, ratio, fitness score) can opt into reversed semantics with the optional `metric_direction` field: + +```markdown +--- +schedule: every 6h +metric_direction: lower # defaults to "higher" if omitted +target-metric: 0.9 # interpreted as "program is complete when best_metric ≤ 0.9" +--- +``` + +Allowed values are `higher` (default) and `lower`. Any other value is rejected at frontmatter-parse time, the scheduler logs a warning, and the program falls back to `higher`. + +When `metric_direction: lower` is set: + +- An iteration's metric is "improved" when `new_metric < best_metric` (instead of `>`). +- Iteration History entries show a `-` (negative delta = improvement) instead of `+`. +- The halting condition fires when `best_metric <= target-metric` (instead of `>=`). + +The agent reads `selected_metric_direction` from `/tmp/gh-aw/autoloop.json` to determine which direction applies to the current iteration. Programs that omit the field are treated as `higher` — no behaviour change for existing programs. + ## Program Definition Each program file defines three things: @@ -435,6 +458,12 @@ The accept path is split into three sub-steps: **5a (push and wait for CI)**, ** **Only entered if the metric improved** (or this is the first run establishing a baseline). +Improvement is **direction-aware**: +- If `selected_metric_direction` is `"higher"` (default): the metric improved when `new_metric > best_metric`. +- If `selected_metric_direction` is `"lower"`: the metric improved when `new_metric < best_metric`. + +Read `selected_metric_direction` from `/tmp/gh-aw/autoloop.json` to know which direction applies. The first run (no `best_metric` yet) always counts as an improvement regardless of direction. + 1. Commit the changes to the long-running branch `autoloop/{program-name}` with a commit message referencing the actions run: - Commit message subject line: `[Autoloop: {program-name}] Iteration : ` - Commit message body (after a blank line): `Run: {run_url}` referencing the GitHub Actions run URL. @@ -488,11 +517,15 @@ If `status == "failure"`, **fix and retry — do not revert, do not accept**: 4. Ensure the program issue exists (see [Program Issue](#program-issue) below) — for file-based programs that have no program issue yet (`selected_issue` is null in `/tmp/gh-aw/autoloop.json`), create one and record its number in the state file's `Issue` field. 5. Update the state file `{program-name}.md` in the repo-memory folder: - Update the **⚙️ Machine State** table: reset `consecutive_errors` to 0, set `best_metric`, increment `iteration_count`, set `last_run` to current UTC timestamp, append `"accepted"` to `recent_statuses` (keep last 10), set `paused` to false. - - Prepend an entry to **📊 Iteration History** (newest first) with status ✅, metric, PR link, the fix-attempt count if `> 0`, and a one-line summary of what changed and why it worked. + - Prepend an entry to **📊 Iteration History** (newest first) with status ✅, metric, **signed delta** (`+` for `higher`-direction programs, `-` for `lower`-direction programs — both arrows point in the "improvement" direction), PR link, the fix-attempt count if `> 0`, and a one-line summary of what changed and why it worked. - Update **📚 Lessons Learned** if this iteration revealed something new about the problem or what works. - Update **🔭 Future Directions** if this iteration opened new promising paths. 6. **Update the program issue**: edit the status comment and post a per-iteration comment on the program issue (see [Program Issue](#program-issue)). Note the fix-attempt count in the per-iteration comment if `> 0`. -7. **Check halting condition** (see [Halting Condition](#halting-condition)): If the program has a `target-metric` in its frontmatter and the new `best_metric` meets or surpasses the target, mark the program as completed. +7. **Check halting condition** (see [Halting Condition](#halting-condition)): If the program has a `target-metric` in its frontmatter, compare the new `best_metric` against it using the program's metric direction (read `selected_metric_direction` from `/tmp/gh-aw/autoloop.json`): + - `higher`: completed when `best_metric >= target-metric`. + - `lower`: completed when `best_metric <= target-metric`. + + When the target is met, mark the program as completed (set `Completed: true`, remove the `autoloop-program` label, add `autoloop-completed`). #### Coordination with PR-health-keeper workflows @@ -607,9 +640,9 @@ Programs can be **open-ended** (run indefinitely until manually stopped) or **go 1. Parse the `target-metric` value from the program's YAML frontmatter (if present). 2. After each **accepted** iteration, compare the new `best_metric` against the `target-metric`. -3. Determine whether the target is met based on the metric direction: - - If the program says "**higher is better**": the target is met when `best_metric >= target-metric`. - - If the program says "**lower is better**": the target is met when `best_metric <= target-metric`. +3. Determine whether the target is met based on the program's `metric_direction` (read from `selected_metric_direction` in `/tmp/gh-aw/autoloop.json`; defaults to `higher` when unset): + - `higher` (default): the target is met when `best_metric >= target-metric`. + - `lower`: the target is met when `best_metric <= target-metric`. 4. When the target is met, **complete** the program: - Set `Completed` to `true` in the state file's **⚙️ Machine State** table. - Set `Completed Reason` to a human-readable message (e.g., `target metric 0.95 reached with value 0.97`). @@ -705,6 +738,7 @@ When creating or updating a program's state file in the repo-memory folder, use | Iteration Count | 0 | | Best Metric | — | | Target Metric | — | +| Metric Direction | higher | | Branch | `autoloop/{program-name}` | | PR | — | | Issue | — | @@ -776,6 +810,7 @@ All iterations in reverse chronological order (newest first). | Iteration Count | integer | Total iterations completed | | Best Metric | number | Best metric value achieved so far | | Target Metric | number or `—` | Target metric from program frontmatter (halting condition). `—` if open-ended | +| Metric Direction | `higher` or `lower` | Whether larger or smaller metric values count as improvement. Defaults to `higher` if absent (back-compat). Set from the program's `metric_direction` frontmatter field. | | Branch | branch name | Long-running branch: `autoloop/{program-name}` | | PR | `#number` or `—` | Draft PR number for this program | | Issue | `#number` or `—` | The single program issue (`[Autoloop: {program-name}]`) for this program. Hosts the status comment, per-iteration comments, and human steering comments. | @@ -795,12 +830,14 @@ After each iteration, prepend an entry to the **📊 Iteration History** section - **Status**: ✅ Accepted / ❌ Rejected / ⚠️ Error - **Change**: {one-line description of what was tried} -- **Metric**: {value} (previous best: {previous_best}, delta: {+/-delta}) +- **Metric**: {value} (previous best: {previous_best}, delta: {signed-delta}) - **Commit**: {short_sha} *(if accepted)* - **CI fix attempts**: {N} *(omit if 0; only present for accepted iterations that needed fix-and-retry)* - **Notes**: {one or two sentences on what this iteration revealed} ``` +The `delta` is **signed by metric direction**: for `higher`-direction programs an improvement is `+`; for `lower`-direction programs an improvement is `-`. In both cases the sign points in the "improvement" direction so the entry reads naturally. + ### Update Rules - **Always** read the state file before proposing a change. It contains human guidance you must follow. diff --git a/workflows/scripts/autoloop_scheduler.py b/workflows/scripts/autoloop_scheduler.py index 827e3f7..40ac1d9 100644 --- a/workflows/scripts/autoloop_scheduler.py +++ b/workflows/scripts/autoloop_scheduler.py @@ -155,9 +155,14 @@ def parse_link_header(header): def parse_program_frontmatter(content): - """Parse optional YAML frontmatter for ``schedule`` and ``target-metric``. + """Parse optional YAML frontmatter for ``schedule``, ``target-metric``, and ``metric_direction``. - Returns ``(schedule_delta, target_metric, target_metric_invalid_value)``. + Returns ``(schedule_delta, target_metric, target_metric_invalid_value, + metric_direction, metric_direction_invalid_value)``. + + ``metric_direction`` is one of ``"higher"`` (default) or ``"lower"``. + Invalid values fall back to ``"higher"`` and the raw string is returned in + the fifth element so the caller can warn. The third element is the raw string of an invalid ``target-metric`` value (so the caller can warn), or ``None`` when the value parsed cleanly or was absent. @@ -167,20 +172,41 @@ def parse_program_frontmatter(content): schedule_delta = None target_metric = None target_metric_invalid = None + metric_direction = "higher" + metric_direction_invalid = None fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content_stripped, re.DOTALL) if not fm_match: - return schedule_delta, target_metric, target_metric_invalid + return ( + schedule_delta, + target_metric, + target_metric_invalid, + metric_direction, + metric_direction_invalid, + ) for line in fm_match.group(1).split("\n"): - if line.strip().startswith("schedule:"): + stripped = line.strip() + if stripped.startswith("schedule:"): schedule_str = line.split(":", 1)[1].strip() schedule_delta = parse_schedule(schedule_str) - if line.strip().startswith("target-metric:"): + if stripped.startswith("target-metric:"): raw = line.split(":", 1)[1].strip() try: target_metric = float(raw) except (ValueError, TypeError): target_metric_invalid = raw - return schedule_delta, target_metric, target_metric_invalid + if stripped.startswith("metric_direction:") or stripped.startswith("metric-direction:"): + raw = line.split(":", 1)[1].strip().strip('"').strip("'").lower() + if raw in ("higher", "lower"): + metric_direction = raw + else: + metric_direction_invalid = raw + return ( + schedule_delta, + target_metric, + target_metric_invalid, + metric_direction, + metric_direction_invalid, + ) def is_unconfigured(content): @@ -363,12 +389,22 @@ def _parse_target_metric_from_file(path): """Re-parse a program file to extract its ``target-metric``, if any.""" try: with open(path) as f: - _, target_metric, _ = parse_program_frontmatter(f.read()) + _, target_metric, _, _, _ = parse_program_frontmatter(f.read()) return target_metric except (OSError, ValueError, TypeError): return None +def _parse_metric_direction_from_file(path): + """Re-parse a program file to extract its ``metric_direction`` (default ``"higher"``).""" + try: + with open(path) as f: + _, _, _, direction, _ = parse_program_frontmatter(f.read()) + return direction or "higher" + except (OSError, ValueError, TypeError): + return "higher" + + # --------------------------------------------------------------------------- # Existing PR lookup (single-PR-per-program invariant) # --------------------------------------------------------------------------- @@ -459,8 +495,10 @@ def select_program(due, forced_program=None, all_programs=None, unconfigured=Non """Pick the program to run. Returns ``(selected, selected_file, selected_issue, selected_target_metric, - deferred, error)``. ``error`` is a string describing why a forced selection - failed (and the caller should ``sys.exit(1)``); otherwise it is ``None``. + selected_metric_direction, deferred, error)``. ``error`` is a string describing + why a forced selection failed (and the caller should ``sys.exit(1)``); + otherwise it is ``None``. ``selected_metric_direction`` is one of + ``"higher"`` (default) or ``"lower"``. """ all_programs = all_programs or {} unconfigured = unconfigured or [] @@ -468,14 +506,14 @@ def select_program(due, forced_program=None, all_programs=None, unconfigured=Non if forced_program: if forced_program not in all_programs: return ( - None, None, None, None, [], + None, None, None, None, "higher", [], "requested program '{}' not found. Available programs: {}".format( forced_program, list(all_programs.keys()) ), ) if forced_program in unconfigured: return ( - None, None, None, None, [], + None, None, None, None, "higher", [], "requested program '{}' is unconfigured (has placeholders).".format( forced_program ), @@ -487,13 +525,25 @@ def select_program(due, forced_program=None, all_programs=None, unconfigured=Non issue_programs[selected]["issue_number"] if selected in issue_programs else None ) selected_target_metric = None + selected_metric_direction = None for p in due: if p["name"] == forced_program: selected_target_metric = p.get("target_metric") + selected_metric_direction = p.get("metric_direction") break if selected_target_metric is None: selected_target_metric = _parse_target_metric_from_file(selected_file) - return selected, selected_file, selected_issue, selected_target_metric, deferred, None + if selected_metric_direction is None: + selected_metric_direction = _parse_metric_direction_from_file(selected_file) + return ( + selected, + selected_file, + selected_issue, + selected_target_metric, + selected_metric_direction, + deferred, + None, + ) if due: # Normal scheduling: pick the single most-overdue program. @@ -502,13 +552,22 @@ def select_program(due, forced_program=None, all_programs=None, unconfigured=Non selected = due_sorted[0]["name"] selected_file = due_sorted[0]["file"] selected_target_metric = due_sorted[0].get("target_metric") + selected_metric_direction = due_sorted[0].get("metric_direction") or "higher" deferred = [p["name"] for p in due_sorted[1:]] selected_issue = ( issue_programs[selected]["issue_number"] if selected in issue_programs else None ) - return selected, selected_file, selected_issue, selected_target_metric, deferred, None + return ( + selected, + selected_file, + selected_issue, + selected_target_metric, + selected_metric_direction, + deferred, + None, + ) - return None, None, None, None, [], None + return None, None, None, None, "higher", [], None # --------------------------------------------------------------------------- @@ -574,9 +633,15 @@ def main(): unconfigured.append(name) continue - schedule_delta, target_metric, invalid_target = parse_program_frontmatter(content) + schedule_delta, target_metric, invalid_target, metric_direction, invalid_direction = parse_program_frontmatter(content) if invalid_target is not None: print(" Warning: {} has invalid target-metric value: {}".format(name, invalid_target)) + if invalid_direction is not None: + print( + " Warning: {} has invalid metric_direction value: {!r} (must be 'higher' or 'lower'); defaulting to 'higher'".format( + name, invalid_direction + ) + ) # Read state from repo-memory state = read_program_state(name) @@ -613,9 +678,15 @@ def main(): ) continue - due.append({"name": name, "last_run": lr, "file": pf, "target_metric": target_metric}) + due.append({ + "name": name, + "last_run": lr, + "file": pf, + "target_metric": target_metric, + "metric_direction": metric_direction, + }) - selected, selected_file, selected_issue, selected_target_metric, deferred, error = ( + selected, selected_file, selected_issue, selected_target_metric, selected_metric_direction, deferred, error = ( select_program(due, forced_program, all_programs, unconfigured, issue_programs) ) @@ -645,6 +716,7 @@ def main(): "selected_file": selected_file, "selected_issue": selected_issue, "selected_target_metric": selected_target_metric, + "selected_metric_direction": selected_metric_direction, "state_file_size_bytes": get_state_file_size(selected) if selected else 0, "state_file_max_bytes": STATE_FILE_MAX_BYTES, "issue_programs": {