From 894a26aa53446041b6d4276b3394b8f481030e18 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 22 Apr 2026 22:27:04 +0000 Subject: [PATCH 1/3] Initial plan From ff9a65de5f8af98469f2d8a538c45e1946a1799e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 22 Apr 2026 22:39:12 +0000 Subject: [PATCH 2/3] Add max-file-size to repo-memory and rolling-compaction rule for state files Agent-Logs-Url: https://github.com/githubnext/autoloop/sessions/149e05de-84fa-4e41-9a14-cf8948cdb980 Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- .github/workflows/autoloop.lock.yml | 40 ++++++++++++++--------------- workflows/autoloop.md | 29 +++++++++++++++++++++ 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/.github/workflows/autoloop.lock.yml b/.github/workflows/autoloop.lock.yml index f9dd301..cde11b7 100644 --- a/.github/workflows/autoloop.lock.yml +++ b/.github/workflows/autoloop.lock.yml @@ -23,7 +23,7 @@ # An iterative optimization loop inspired by Karpathy's Autoresearch and Claude Code's /loop. # Runs on a configurable schedule to autonomously improve a target artifact toward a measurable goal. # Each iteration: reads the program definition, proposes a change, evaluates against a metric, -# and accepts or rejects the change. Tracks all iterations in a rolling GitHub issue. +# and accepts or rejects the change. # - User defines the optimization goal and evaluation criteria in a program.md file # - Accepts changes only when they improve the metric (ratchet pattern) # - Persists all state via repo-memory (human-readable, human-editable) @@ -36,7 +36,7 @@ # Imports: # - shared/reporting.md # -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"c63aa1e1ccbd4cd47d9d18200ed0141d013ea22a3ae98d3efae2563cfda919c4","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"19098fa82e0ab9ec831e8d8a107d81912482aaeed95b6dae5c800aab39054281","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"} name: "Autoloop" "on": @@ -227,21 +227,21 @@ jobs: run: | bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh { - cat << 'GH_AW_PROMPT_25eda8a50024125e_EOF' + cat << 'GH_AW_PROMPT_11e5988a560a57a3_EOF' - GH_AW_PROMPT_25eda8a50024125e_EOF + GH_AW_PROMPT_11e5988a560a57a3_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" cat "${RUNNER_TEMP}/gh-aw/prompts/repo_memory_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" - cat << 'GH_AW_PROMPT_25eda8a50024125e_EOF' + cat << 'GH_AW_PROMPT_11e5988a560a57a3_EOF' - Tools: add_comment(max:7), create_issue(max:1), update_issue(max:3), create_pull_request(max:2), add_labels(max:2), remove_labels(max:2), push_to_pull_request_branch(max:2), missing_tool, missing_data, noop - GH_AW_PROMPT_25eda8a50024125e_EOF + Tools: add_comment(max:7), create_issue, update_issue(max:3), create_pull_request(max:2), add_labels(max:2), remove_labels(max:2), push_to_pull_request_branch(max:2), missing_tool, missing_data, noop + GH_AW_PROMPT_11e5988a560a57a3_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_create_pull_request.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_push_to_pr_branch.md" - cat << 'GH_AW_PROMPT_25eda8a50024125e_EOF' + cat << 'GH_AW_PROMPT_11e5988a560a57a3_EOF' The following GitHub context information is available for this workflow: @@ -274,7 +274,7 @@ jobs: - **Note**: If a branch you need is not in the list above and is not listed as an additional fetched ref, it has NOT been checked out. For private repositories you cannot fetch it without proper authentication. If the branch is required and not available, exit with an error and ask the user to add it to the `fetch:` option of the `checkout:` configuration (e.g., `fetch: ["refs/pulls/open/*"]` for all open PR refs, or `fetch: ["main", "feature/my-branch"]` for specific branches). - GH_AW_PROMPT_25eda8a50024125e_EOF + GH_AW_PROMPT_11e5988a560a57a3_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" if [ "$GITHUB_EVENT_NAME" = "issue_comment" ] && [ -n "$GH_AW_IS_PR_COMMENT" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review_comment" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review" ]; then cat "${RUNNER_TEMP}/gh-aw/prompts/pr_context_prompt.md" @@ -282,11 +282,11 @@ jobs: if [ "$GITHUB_EVENT_NAME" = "issue_comment" ] && [ -n "$GH_AW_IS_PR_COMMENT" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review_comment" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review" ]; then cat "${RUNNER_TEMP}/gh-aw/prompts/pr_context_push_to_pr_branch_guidance.md" fi - cat << 'GH_AW_PROMPT_25eda8a50024125e_EOF' + cat << 'GH_AW_PROMPT_11e5988a560a57a3_EOF' {{#runtime-import .github/workflows/shared/reporting.md}} {{#runtime-import .github/workflows/autoloop.md}} - GH_AW_PROMPT_25eda8a50024125e_EOF + GH_AW_PROMPT_11e5988a560a57a3_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 @@ -488,12 +488,12 @@ jobs: mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_4cb88c61a21f151b_EOF' + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_b0478df8124c275d_EOF' {"add_comment":{"hide_older_comments":false,"max":7,"target":"*"},"add_labels":{"max":2,"target":"*"},"create_issue":{"labels":["automation","autoloop"],"max":1,"title_prefix":"[Autoloop] "},"create_pull_request":{"draft":true,"labels":["automation","autoloop"],"max":2,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_files_policy":"fallback-to-issue","protected_path_prefixes":[".github/",".agents/"],"title_prefix":"[Autoloop] "},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"push_repo_memory":{"memories":[{"dir":"/tmp/gh-aw/repo-memory/default","id":"default","max_file_count":100,"max_file_size":10240,"max_patch_size":10240}]},"push_to_pull_request_branch":{"if_no_changes":"warn","max":2,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_path_prefixes":[".github/",".agents/"],"target":"*","title_prefix":"[Autoloop] "},"remove_labels":{"max":2,"target":"*"},"update_issue":{"allow_body":true,"max":3,"target":"*","title_prefix":"[Autoloop] "}} - GH_AW_SAFE_OUTPUTS_CONFIG_4cb88c61a21f151b_EOF + GH_AW_SAFE_OUTPUTS_CONFIG_b0478df8124c275d_EOF - name: Write Safe Outputs Tools run: | - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_e890c86d61e62c6e_EOF' + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_e083150970ed99c8_EOF' { "description_suffixes": { "add_comment": " CONSTRAINTS: Maximum 7 comment(s) can be added. Target: *.", @@ -507,8 +507,8 @@ jobs: "repo_params": {}, "dynamic_tools": [] } - GH_AW_SAFE_OUTPUTS_TOOLS_META_e890c86d61e62c6e_EOF - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_713ceb1e276ac547_EOF' + GH_AW_SAFE_OUTPUTS_TOOLS_META_e083150970ed99c8_EOF + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_5352c6397ffb4603_EOF' { "add_comment": { "defaultMax": 1, @@ -767,7 +767,7 @@ jobs: "customValidation": "requiresOneOf:status,title,body" } } - GH_AW_SAFE_OUTPUTS_VALIDATION_713ceb1e276ac547_EOF + GH_AW_SAFE_OUTPUTS_VALIDATION_5352c6397ffb4603_EOF node ${RUNNER_TEMP}/gh-aw/actions/generate_safe_outputs_tools.cjs - name: Generate Safe Outputs MCP Server Config id: safe-outputs-config @@ -837,7 +837,7 @@ jobs: export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.11' mkdir -p /home/runner/.copilot - cat << GH_AW_MCP_CONFIG_d7bba620cd55956d_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh + cat << GH_AW_MCP_CONFIG_5d2d4ce1ccd1824d_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh { "mcpServers": { "github": { @@ -878,7 +878,7 @@ jobs: "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" } } - GH_AW_MCP_CONFIG_d7bba620cd55956d_EOF + GH_AW_MCP_CONFIG_5d2d4ce1ccd1824d_EOF - name: Download activation artifact uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: @@ -1543,7 +1543,7 @@ jobs: GH_AW_ALLOWED_DOMAINS: "*.gradle-enterprise.cloud,*.jsr.io,*.pythonhosted.org,*.vsblob.vsassets.io,adoptium.net,anaconda.org,api.adoptium.net,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.foojay.io,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.npms.io,api.nuget.org,api.snapcraft.io,archive.apache.org,archive.ubuntu.com,azure.archive.ubuntu.com,azuresearch-usnc.nuget.org,azuresearch-ussc.nuget.org,binstar.org,bootstrap.pypa.io,builds.dotnet.microsoft.com,bun.sh,cdn.azul.com,cdn.jsdelivr.net,central.sonatype.com,ci.dot.net,conda.anaconda.org,conda.binstar.org,crates.io,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,dc.services.visualstudio.com,deb.nodesource.com,deno.land,develocity.apache.org,dist.nuget.org,dl.google.com,dlcdn.apache.org,dot.net,dotnet.microsoft.com,dotnetcli.blob.core.windows.net,download.eclipse.org,download.java.net,download.oracle.com,downloads.gradle-dn.com,esm.sh,files.pythonhosted.org,ge.spockframework.org,get.pnpm.io,github.com,googleapis.deno.dev,googlechromelabs.github.io,gradle.org,host.docker.internal,index.crates.io,jcenter.bintray.com,jdk.java.net,json-schema.org,json.schemastore.org,jsr.io,keyserver.ubuntu.com,maven-central.storage-download.googleapis.com,maven.apache.org,maven.google.com,maven.oracle.com,maven.pkg.github.com,nodejs.org,npm.pkg.github.com,npmjs.com,npmjs.org,nuget.org,nuget.pkg.github.com,nugetregistryv2prod.blob.core.windows.net,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,oneocsp.microsoft.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,pip.pypa.io,pkgs.dev.azure.com,plugins-artifacts.gradle.org,plugins.gradle.org,ppa.launchpad.net,pypi.org,pypi.python.org,raw.githubusercontent.com,registry.bower.io,registry.npmjs.com,registry.npmjs.org,registry.yarnpkg.com,repo.anaconda.com,repo.continuum.io,repo.gradle.org,repo.grails.org,repo.maven.apache.org,repo.spring.io,repo.yarnpkg.com,repo1.maven.org,repository.apache.org,s.symcb.com,s.symcd.com,scans-in.gradle.com,security.ubuntu.com,services.gradle.org,sh.rustup.rs,skimdb.npmjs.com,static.crates.io,static.rust-lang.org,storage.googleapis.com,telemetry.enterprise.githubcopilot.com,telemetry.vercel.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com,www.java.com,www.microsoft.com,www.npmjs.com,www.npmjs.org,yarnpkg.com" GITHUB_SERVER_URL: ${{ github.server_url }} GITHUB_API_URL: ${{ github.api_url }} - GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"add_comment\":{\"hide_older_comments\":false,\"max\":7,\"target\":\"*\"},\"add_labels\":{\"max\":2,\"target\":\"*\"},\"create_issue\":{\"labels\":[\"automation\",\"autoloop\"],\"max\":2,\"title_prefix\":\"[Autoloop] \"},\"create_pull_request\":{\"draft\":true,\"labels\":[\"automation\",\"autoloop\"],\"max\":2,\"max_patch_size\":1024,\"protected_files\":[\"package.json\",\"bun.lockb\",\"bunfig.toml\",\"deno.json\",\"deno.jsonc\",\"deno.lock\",\"global.json\",\"NuGet.Config\",\"Directory.Packages.props\",\"mix.exs\",\"mix.lock\",\"go.mod\",\"go.sum\",\"stack.yaml\",\"stack.yaml.lock\",\"pom.xml\",\"build.gradle\",\"build.gradle.kts\",\"settings.gradle\",\"settings.gradle.kts\",\"gradle.properties\",\"package-lock.json\",\"yarn.lock\",\"pnpm-lock.yaml\",\"npm-shrinkwrap.json\",\"requirements.txt\",\"Pipfile\",\"Pipfile.lock\",\"pyproject.toml\",\"setup.py\",\"setup.cfg\",\"Gemfile\",\"Gemfile.lock\",\"uv.lock\",\"CODEOWNERS\",\"AGENTS.md\"],\"protected_files_policy\":\"fallback-to-issue\",\"protected_path_prefixes\":[\".github/\",\".agents/\"],\"title_prefix\":\"[Autoloop] \"},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"true\"},\"push_to_pull_request_branch\":{\"if_no_changes\":\"warn\",\"max\":2,\"max_patch_size\":1024,\"protected_files\":[\"package.json\",\"bun.lockb\",\"bunfig.toml\",\"deno.json\",\"deno.jsonc\",\"deno.lock\",\"global.json\",\"NuGet.Config\",\"Directory.Packages.props\",\"mix.exs\",\"mix.lock\",\"go.mod\",\"go.sum\",\"stack.yaml\",\"stack.yaml.lock\",\"pom.xml\",\"build.gradle\",\"build.gradle.kts\",\"settings.gradle\",\"settings.gradle.kts\",\"gradle.properties\",\"package-lock.json\",\"yarn.lock\",\"pnpm-lock.yaml\",\"npm-shrinkwrap.json\",\"requirements.txt\",\"Pipfile\",\"Pipfile.lock\",\"pyproject.toml\",\"setup.py\",\"setup.cfg\",\"Gemfile\",\"Gemfile.lock\",\"uv.lock\",\"CODEOWNERS\",\"AGENTS.md\"],\"protected_path_prefixes\":[\".github/\",\".agents/\"],\"target\":\"*\",\"title_prefix\":\"[Autoloop] \"},\"remove_labels\":{\"max\":2,\"target\":\"*\"},\"update_issue\":{\"allow_body\":true,\"max\":3,\"target\":\"*\",\"title_prefix\":\"[Autoloop] \"}}" + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"add_comment\":{\"hide_older_comments\":false,\"max\":7,\"target\":\"*\"},\"add_labels\":{\"max\":2,\"target\":\"*\"},\"create_issue\":{\"labels\":[\"automation\",\"autoloop\"],\"max\":1,\"title_prefix\":\"[Autoloop] \"},\"create_pull_request\":{\"draft\":true,\"labels\":[\"automation\",\"autoloop\"],\"max\":2,\"max_patch_size\":1024,\"protected_files\":[\"package.json\",\"bun.lockb\",\"bunfig.toml\",\"deno.json\",\"deno.jsonc\",\"deno.lock\",\"global.json\",\"NuGet.Config\",\"Directory.Packages.props\",\"mix.exs\",\"mix.lock\",\"go.mod\",\"go.sum\",\"stack.yaml\",\"stack.yaml.lock\",\"pom.xml\",\"build.gradle\",\"build.gradle.kts\",\"settings.gradle\",\"settings.gradle.kts\",\"gradle.properties\",\"package-lock.json\",\"yarn.lock\",\"pnpm-lock.yaml\",\"npm-shrinkwrap.json\",\"requirements.txt\",\"Pipfile\",\"Pipfile.lock\",\"pyproject.toml\",\"setup.py\",\"setup.cfg\",\"Gemfile\",\"Gemfile.lock\",\"uv.lock\",\"CODEOWNERS\",\"AGENTS.md\"],\"protected_files_policy\":\"fallback-to-issue\",\"protected_path_prefixes\":[\".github/\",\".agents/\"],\"title_prefix\":\"[Autoloop] \"},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"true\"},\"push_to_pull_request_branch\":{\"if_no_changes\":\"warn\",\"max\":2,\"max_patch_size\":1024,\"protected_files\":[\"package.json\",\"bun.lockb\",\"bunfig.toml\",\"deno.json\",\"deno.jsonc\",\"deno.lock\",\"global.json\",\"NuGet.Config\",\"Directory.Packages.props\",\"mix.exs\",\"mix.lock\",\"go.mod\",\"go.sum\",\"stack.yaml\",\"stack.yaml.lock\",\"pom.xml\",\"build.gradle\",\"build.gradle.kts\",\"settings.gradle\",\"settings.gradle.kts\",\"gradle.properties\",\"package-lock.json\",\"yarn.lock\",\"pnpm-lock.yaml\",\"npm-shrinkwrap.json\",\"requirements.txt\",\"Pipfile\",\"Pipfile.lock\",\"pyproject.toml\",\"setup.py\",\"setup.cfg\",\"Gemfile\",\"Gemfile.lock\",\"uv.lock\",\"CODEOWNERS\",\"AGENTS.md\"],\"protected_path_prefixes\":[\".github/\",\".agents/\"],\"target\":\"*\",\"title_prefix\":\"[Autoloop] \"},\"remove_labels\":{\"max\":2,\"target\":\"*\"},\"update_issue\":{\"allow_body\":true,\"max\":3,\"target\":\"*\",\"title_prefix\":\"[Autoloop] \"}}" GH_AW_CI_TRIGGER_TOKEN: ${{ secrets.GH_AW_CI_TRIGGER_TOKEN }} with: github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} diff --git a/workflows/autoloop.md b/workflows/autoloop.md index 52c94a7..f9f4a4c 100644 --- a/workflows/autoloop.md +++ b/workflows/autoloop.md @@ -76,6 +76,11 @@ tools: repo-memory: branch-name: memory/autoloop file-glob: ["*.md"] + # 30 KB per state file -- enough for the structured sections plus ~10 most-recent + # iteration entries plus ~5 compressed-range summaries. The rolling-compaction + # rule in "Update Rules" below keeps files under this budget. Tune up for + # short-cadence programs (e.g. `every 5m`); tune down for daily-cadence ones. + max-file-size: 30720 imports: - shared/reporting.md @@ -179,6 +184,19 @@ steps: return parseMachineState(content); } + function getStateFileSize(programName) { + // Returns the size of the program's state file in bytes, or 0 if it + // does not exist. Surfaced in autoloop.json so the agent can decide + // whether to compact aggressively this iteration. + const stateFile = path.join(repoMemoryDir, programName + '.md'); + try { + const st = fs.statSync(stateFile); + return st.isFile() ? st.size : 0; + } catch (e) { + return 0; + } + } + // Schedule string to milliseconds function parseSchedule(s) { s = s.trim().toLowerCase(); @@ -547,6 +565,8 @@ steps: selected_file: selectedFile, selected_issue: selectedIssue, selected_target_metric: selectedTargetMetric, + state_file_size_bytes: selected ? getStateFileSize(selected) : 0, + state_file_max_bytes: 30720, issue_programs: issueProgramsMap, deferred: deferred, skipped: skipped, @@ -650,6 +670,8 @@ The pre-step has already determined which program to run. Read `/tmp/gh-aw/autol - **`selected_file`**: The full path to the program's markdown file (either `.autoloop/programs//program.md`, `.autoloop/programs/.md`, or `/tmp/gh-aw/issue-programs/.md` for issue-based programs). - **`selected_issue`**: The GitHub issue number if the selected program came from an issue, or `null` if it came from a file. - **`selected_target_metric`**: The `target-metric` value from the program's frontmatter (a number), or `null` if the program is open-ended. Used to check the [halting condition](#halting-condition) after each accepted iteration. +- **`state_file_size_bytes`**: Current size of the selected program's state file in bytes (0 if it does not exist yet). Use this together with `state_file_max_bytes` to decide whether to compact aggressively this iteration (see [Update Rules](#update-rules) β€” when size exceeds 80% of the max, collapse older iteration entries). +- **`state_file_max_bytes`**: The configured `max-file-size` for repo-memory state files (default `30720`, i.e. 30 KB). Files larger than this are rejected by repo-memory, breaking scheduling. - **`issue_programs`**: A mapping of program name β†’ issue number for all discovered issue-based programs. - **`deferred`**: Other programs that were due but will be handled in future runs. - **`unconfigured`**: Programs that still have the sentinel or placeholder content. @@ -1163,6 +1185,13 @@ After each iteration, prepend an entry to the **πŸ“Š Iteration History** section - **Add to Foreclosed Avenues** only when an approach is conclusively ruled out (not just rejected once). - **Respect Current Priorities** β€” if a maintainer has written priorities, follow them in your next proposal. - **Write the state file** to the repo-memory folder. Changes are automatically committed and pushed to the `memory/autoloop` branch after the workflow completes. +- **Keep the state file compact.** The state file must stay under the configured `max-file-size` (default 30 KB β€” see `state_file_max_bytes` in `/tmp/gh-aw/autoloop.json`). When prepending a new iteration entry, collapse older iteration entries (beyond the most recent 10) into compressed summary lines. Example format for collapsed entries: + + ```markdown + ### Iters 50–100 β€” βœ… (metrics 20β†’55): brief summary of what worked across this range + ``` + + Also prune **πŸ“š Lessons Learned** to the most recent and most relevant entries, and consolidate similar entries in **🚧 Foreclosed Avenues** if it grows beyond a page. If `state_file_size_bytes` from `/tmp/gh-aw/autoloop.json` is already greater than 80% of `state_file_max_bytes`, **compact aggressively** this iteration: collapse to the most recent 5 detailed entries and merge older compressed ranges into broader bands. Repo-memory rejects files larger than `max-file-size`, which breaks scheduling β€” so keeping the file under budget is mandatory, not optional. ## Guidelines From 522c525e3921cdf03f99119af125a6b105348e3c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Apr 2026 03:05:00 +0000 Subject: [PATCH 3/3] Merge origin/main and re-apply max-file-size + compaction changes to Python scheduler Agent-Logs-Url: https://github.com/githubnext/autoloop/sessions/705c2b0a-fd32-4450-bddb-a715daf0dce1 Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- .github/workflows/autoloop.lock.yml | 40 +- .github/workflows/autoloop.md | 465 ++--------- .../workflows/scripts/autoloop_scheduler.py | 676 ++++++++++++++++ .github/workflows/sync-branches.lock.yml | 20 +- .github/workflows/sync-branches.md | 75 +- .github/workflows/tests.yml | 2 +- AGENTS.md | 19 +- tests/conftest.py | 133 +--- tests/test_scheduler_e2e.py | 280 +++++++ tests/test_scheduling.py | 352 ++++++--- workflows/autoloop.md | 724 +++++------------- workflows/scripts/autoloop_scheduler.py | 676 ++++++++++++++++ workflows/sync-branches.md | 62 +- 13 files changed, 2298 insertions(+), 1226 deletions(-) create mode 100644 .github/workflows/scripts/autoloop_scheduler.py create mode 100644 tests/test_scheduler_e2e.py create mode 100644 workflows/scripts/autoloop_scheduler.py diff --git a/.github/workflows/autoloop.lock.yml b/.github/workflows/autoloop.lock.yml index cde11b7..3cd4ee4 100644 --- a/.github/workflows/autoloop.lock.yml +++ b/.github/workflows/autoloop.lock.yml @@ -36,7 +36,7 @@ # Imports: # - shared/reporting.md # -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"19098fa82e0ab9ec831e8d8a107d81912482aaeed95b6dae5c800aab39054281","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"261bf9da368e23a161bfa806b9ff11c0b6e8777ac493b6e8ac0a7586b6a046b5","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"} name: "Autoloop" "on": @@ -227,21 +227,21 @@ jobs: run: | bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh { - cat << 'GH_AW_PROMPT_11e5988a560a57a3_EOF' + cat << 'GH_AW_PROMPT_0171e714d32c535f_EOF' - GH_AW_PROMPT_11e5988a560a57a3_EOF + GH_AW_PROMPT_0171e714d32c535f_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" cat "${RUNNER_TEMP}/gh-aw/prompts/repo_memory_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" - cat << 'GH_AW_PROMPT_11e5988a560a57a3_EOF' + cat << 'GH_AW_PROMPT_0171e714d32c535f_EOF' Tools: add_comment(max:7), create_issue, update_issue(max:3), create_pull_request(max:2), add_labels(max:2), remove_labels(max:2), push_to_pull_request_branch(max:2), missing_tool, missing_data, noop - GH_AW_PROMPT_11e5988a560a57a3_EOF + GH_AW_PROMPT_0171e714d32c535f_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_create_pull_request.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_push_to_pr_branch.md" - cat << 'GH_AW_PROMPT_11e5988a560a57a3_EOF' + cat << 'GH_AW_PROMPT_0171e714d32c535f_EOF' The following GitHub context information is available for this workflow: @@ -274,7 +274,7 @@ jobs: - **Note**: If a branch you need is not in the list above and is not listed as an additional fetched ref, it has NOT been checked out. For private repositories you cannot fetch it without proper authentication. If the branch is required and not available, exit with an error and ask the user to add it to the `fetch:` option of the `checkout:` configuration (e.g., `fetch: ["refs/pulls/open/*"]` for all open PR refs, or `fetch: ["main", "feature/my-branch"]` for specific branches). - GH_AW_PROMPT_11e5988a560a57a3_EOF + GH_AW_PROMPT_0171e714d32c535f_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" if [ "$GITHUB_EVENT_NAME" = "issue_comment" ] && [ -n "$GH_AW_IS_PR_COMMENT" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review_comment" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review" ]; then cat "${RUNNER_TEMP}/gh-aw/prompts/pr_context_prompt.md" @@ -282,11 +282,11 @@ jobs: if [ "$GITHUB_EVENT_NAME" = "issue_comment" ] && [ -n "$GH_AW_IS_PR_COMMENT" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review_comment" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review" ]; then cat "${RUNNER_TEMP}/gh-aw/prompts/pr_context_push_to_pr_branch_guidance.md" fi - cat << 'GH_AW_PROMPT_11e5988a560a57a3_EOF' + cat << 'GH_AW_PROMPT_0171e714d32c535f_EOF' {{#runtime-import .github/workflows/shared/reporting.md}} {{#runtime-import .github/workflows/autoloop.md}} - GH_AW_PROMPT_11e5988a560a57a3_EOF + GH_AW_PROMPT_0171e714d32c535f_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 @@ -418,6 +418,10 @@ jobs: run: | header=$(printf "x-access-token:%s" "${GH_AW_FETCH_TOKEN}" | base64 -w 0) git -c "http.extraheader=Authorization: Basic ${header}" fetch origin '+refs/heads/*:refs/remotes/origin/*' + - name: Setup Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' - name: Create gh-aw temp directory run: bash ${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh - name: Configure gh CLI for GitHub Enterprise @@ -429,7 +433,7 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ github.token }} name: Check which programs are due - run: "python3 - << 'PYEOF'\nimport os, json, re, glob, sys\nimport urllib.request, urllib.error\nfrom datetime import datetime, timezone, timedelta\n\nprograms_dir = \".autoloop/programs\"\nautoloop_dir = \".autoloop/programs\"\ntemplate_file = os.path.join(autoloop_dir, \"example.md\")\n\n# Read program state from repo-memory (persistent git-backed storage)\ngithub_token = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\nforced_program = os.environ.get(\"AUTOLOOP_PROGRAM\", \"\").strip()\n\n# Repo-memory files are cloned to /tmp/gh-aw/repo-memory/{id}/ where {id}\n# is derived from the branch-name configured in the tools section (memory/autoloop β†’ autoloop)\nrepo_memory_dir = \"/tmp/gh-aw/repo-memory/autoloop\"\n\ndef parse_machine_state(content):\n \"\"\"Parse the βš™οΈ Machine State table from a state file. Returns a dict.\"\"\"\n state = {}\n m = re.search(r'## βš™οΈ Machine State.*?\\n(.*?)(?=\\n## |\\Z)', content, re.DOTALL)\n if not m:\n return state\n section = m.group(0)\n for row in re.finditer(r'\\|\\s*(.+?)\\s*\\|\\s*(.+?)\\s*\\|', section):\n raw_key = row.group(1).strip()\n raw_val = row.group(2).strip()\n if raw_key.lower() in (\"field\", \"---\", \":---\", \":---:\", \"---:\"):\n continue\n key = raw_key.lower().replace(\" \", \"_\")\n val = None if raw_val in (\"β€”\", \"-\", \"\") else raw_val\n state[key] = val\n # Coerce types\n for int_field in (\"iteration_count\", \"consecutive_errors\"):\n if int_field in state:\n try:\n state[int_field] = int(state[int_field])\n except (ValueError, TypeError):\n state[int_field] = 0\n if \"paused\" in state:\n state[\"paused\"] = str(state.get(\"paused\", \"\")).lower() == \"true\"\n if \"completed\" in state:\n state[\"completed\"] = str(state.get(\"completed\", \"\")).lower() == \"true\"\n # recent_statuses: stored as comma-separated words (e.g. \"accepted, rejected, error\")\n rs_raw = state.get(\"recent_statuses\") or \"\"\n if rs_raw:\n state[\"recent_statuses\"] = [s.strip().lower() for s in rs_raw.split(\",\") if s.strip()]\n else:\n state[\"recent_statuses\"] = []\n return state\n\ndef read_program_state(program_name):\n \"\"\"Read scheduling state from the repo-memory state file.\"\"\"\n state_file = os.path.join(repo_memory_dir, f\"{program_name}.md\")\n if not os.path.isfile(state_file):\n print(f\" {program_name}: no state file found (first run)\")\n return {}\n with open(state_file, encoding=\"utf-8\") as f:\n content = f.read()\n return parse_machine_state(content)\n\n# Bootstrap: create autoloop programs directory and template if missing\nif not os.path.isdir(autoloop_dir):\n os.makedirs(autoloop_dir, exist_ok=True)\n bt = chr(96) # backtick β€” avoid literal backticks that break gh-aw compiler\n template = \"\\n\".join([\n \"\",\n \"\",\n \"\",\n \"\",\n \"# Autoloop Program\",\n \"\",\n \"\",\n \"\",\n \"## Goal\",\n \"\",\n \"\",\n \"\",\n \"REPLACE THIS with your optimization goal.\",\n \"\",\n \"## Target\",\n \"\",\n \"\",\n \"\",\n \"Only modify these files:\",\n f\"- {bt}REPLACE_WITH_FILE{bt} -- (describe what this file does)\",\n \"\",\n \"Do NOT modify:\",\n \"- (list files that must not be touched)\",\n \"\",\n \"## Evaluation\",\n \"\",\n \"\",\n \"\",\n f\"{bt}{bt}{bt}bash\",\n \"REPLACE_WITH_YOUR_EVALUATION_COMMAND\",\n f\"{bt}{bt}{bt}\",\n \"\",\n f\"The metric is {bt}REPLACE_WITH_METRIC_NAME{bt}. **Lower/Higher is better.** (pick one)\",\n \"\",\n ])\n with open(template_file, \"w\") as f:\n f.write(template)\n # Leave the template unstaged β€” the agent will create a draft PR with it\n print(f\"BOOTSTRAPPED: created {template_file} locally (agent will create a draft PR)\")\n\n# Find all program files from all locations:\n# 1. Directory-based programs: .autoloop/programs//program.md (preferred)\n# 2. Bare markdown programs: .autoloop/programs/.md (simple)\n# 3. Issue-based programs: GitHub issues with the 'autoloop-program' label\nprogram_files = []\nissue_programs = {} # name -> {issue_number, file}\n\n# Scan .autoloop/programs/ for directory-based programs\nif os.path.isdir(programs_dir):\n for entry in sorted(os.listdir(programs_dir)):\n prog_dir = os.path.join(programs_dir, entry)\n if os.path.isdir(prog_dir):\n # Look for program.md inside the directory\n prog_file = os.path.join(prog_dir, \"program.md\")\n if os.path.isfile(prog_file):\n program_files.append(prog_file)\n\n# Scan .autoloop/programs/ for bare markdown programs\nbare_programs = sorted(glob.glob(os.path.join(autoloop_dir, \"*.md\")))\nfor pf in bare_programs:\n program_files.append(pf)\n\n# Scan GitHub issues with the 'autoloop-program' label\nissue_programs_dir = \"/tmp/gh-aw/issue-programs\"\nos.makedirs(issue_programs_dir, exist_ok=True)\ntry:\n api_url = f\"https://api.github.com/repos/{repo}/issues?labels=autoloop-program&state=open&per_page=100\"\n req = urllib.request.Request(api_url, headers={\n \"Authorization\": f\"token {github_token}\",\n \"Accept\": \"application/vnd.github.v3+json\",\n })\n with urllib.request.urlopen(req, timeout=30) as resp:\n issues = json.loads(resp.read().decode())\n for issue in issues:\n if issue.get(\"pull_request\"):\n continue # skip PRs\n body = issue.get(\"body\") or \"\"\n title = issue.get(\"title\") or \"\"\n number = issue[\"number\"]\n # Derive program name from issue title: slugify to lowercase with hyphens\n slug = re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-')\n slug = re.sub(r'-+', '-', slug) # collapse consecutive hyphens\n if not slug:\n slug = f\"issue-{number}\"\n # Avoid slug collisions: if another issue already claimed this slug, append issue number\n if slug in issue_programs:\n print(f\" Warning: slug '{slug}' (issue #{number}) collides with issue #{issue_programs[slug]['issue_number']}, appending issue number\")\n slug = f\"{slug}-{number}\"\n # Write issue body to a temp file so the scheduling loop can process it\n issue_file = os.path.join(issue_programs_dir, f\"{slug}.md\")\n with open(issue_file, \"w\") as f:\n f.write(body)\n program_files.append(issue_file)\n issue_programs[slug] = {\"issue_number\": number, \"file\": issue_file, \"title\": title}\n print(f\" Found issue-based program: '{slug}' (issue #{number})\")\nexcept Exception as e:\n print(f\" Warning: could not fetch issue-based programs: {e}\")\n\nif not program_files:\n # Fallback to single-file locations\n for path in [\".autoloop/program.md\", \"program.md\"]:\n if os.path.isfile(path):\n program_files = [path]\n break\n\nif not program_files:\n print(\"NO_PROGRAMS_FOUND\")\n os.makedirs(\"/tmp/gh-aw\", exist_ok=True)\n with open(\"/tmp/gh-aw/autoloop.json\", \"w\") as f:\n json.dump({\"due\": [], \"skipped\": [], \"unconfigured\": [], \"no_programs\": True}, f)\n sys.exit(0)\n\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\nnow = datetime.now(timezone.utc)\ndue = []\nskipped = []\nunconfigured = []\nall_programs = {} # name -> file path (populated during scanning)\n\n# Schedule string to timedelta\ndef parse_schedule(s):\n s = s.strip().lower()\n m = re.match(r\"every\\s+(\\d+)\\s*h\", s)\n if m:\n return timedelta(hours=int(m.group(1)))\n m = re.match(r\"every\\s+(\\d+)\\s*m\", s)\n if m:\n return timedelta(minutes=int(m.group(1)))\n if s == \"daily\":\n return timedelta(hours=24)\n if s == \"weekly\":\n return timedelta(days=7)\n return None # No per-program schedule β€” always due\n\ndef get_program_name(pf):\n \"\"\"Extract program name from file path.\n Directory-based: .autoloop/programs//program.md -> \n Bare markdown: .autoloop/programs/.md -> \n Issue-based: /tmp/gh-aw/issue-programs/.md -> \n \"\"\"\n if pf.endswith(\"/program.md\"):\n # Directory-based program: name is the parent directory\n return os.path.basename(os.path.dirname(pf))\n else:\n # Bare markdown or issue-based program: name is the filename without .md\n return os.path.splitext(os.path.basename(pf))[0]\n\nfor pf in program_files:\n name = get_program_name(pf)\n all_programs[name] = pf\n with open(pf) as f:\n content = f.read()\n\n # Check sentinel (skip for issue-based programs which use AUTOLOOP:ISSUE-PROGRAM)\n if \"\" in content:\n unconfigured.append(name)\n continue\n\n # Check for TODO/REPLACE placeholders\n if re.search(r'\\bTODO\\b|\\bREPLACE', content):\n unconfigured.append(name)\n continue\n\n # Parse optional YAML frontmatter for schedule and target-metric\n # Strip leading HTML comments before checking (issue-based programs may have them)\n content_stripped = re.sub(r'^(\\s*\\s*\\n)*', '', content, flags=re.DOTALL)\n schedule_delta = None\n target_metric = None\n fm_match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n\", content_stripped, re.DOTALL)\n if fm_match:\n for line in fm_match.group(1).split(\"\\n\"):\n if line.strip().startswith(\"schedule:\"):\n schedule_str = line.split(\":\", 1)[1].strip()\n schedule_delta = parse_schedule(schedule_str)\n if line.strip().startswith(\"target-metric:\"):\n try:\n target_metric = float(line.split(\":\", 1)[1].strip())\n except (ValueError, TypeError):\n print(f\" Warning: {name} has invalid target-metric value: {line.split(':', 1)[1].strip()}\")\n\n # Read state from repo-memory\n state = read_program_state(name)\n if state:\n print(f\" {name}: last_run={state.get('last_run')}, iteration_count={state.get('iteration_count')}\")\n else:\n print(f\" {name}: no state found (first run)\")\n\n last_run = None\n lr = state.get(\"last_run\")\n if lr:\n try:\n last_run = datetime.fromisoformat(lr.replace(\"Z\", \"+00:00\"))\n except ValueError:\n pass\n\n # Check if completed (target metric was reached)\n if str(state.get(\"completed\", \"\")).lower() == \"true\":\n skipped.append({\"name\": name, \"reason\": f\"completed: target metric reached\"})\n continue\n\n # Check if paused (e.g., plateau or recurring errors)\n if state.get(\"paused\"):\n skipped.append({\"name\": name, \"reason\": f\"paused: {state.get('pause_reason', 'unknown')}\"})\n continue\n\n # Auto-pause on plateau: 5+ consecutive rejections\n recent = state.get(\"recent_statuses\", [])[-5:]\n if len(recent) >= 5 and all(s == \"rejected\" for s in recent):\n skipped.append({\"name\": name, \"reason\": \"plateau: 5 consecutive rejections\"})\n continue\n\n # Check if due based on per-program schedule\n if schedule_delta and last_run:\n if now - last_run < schedule_delta:\n skipped.append({\"name\": name, \"reason\": \"not due yet\",\n \"next_due\": (last_run + schedule_delta).isoformat()})\n continue\n\n due.append({\"name\": name, \"last_run\": lr, \"file\": pf, \"target_metric\": target_metric})\n\n# Pick the program to run\nselected = None\nselected_file = None\nselected_issue = None\nselected_target_metric = None\ndeferred = []\n\nif forced_program:\n # Manual dispatch requested a specific program β€” bypass scheduling\n # (paused, not-due, and plateau programs can still be forced)\n if forced_program not in all_programs:\n print(f\"ERROR: requested program '{forced_program}' not found.\")\n print(f\" Available programs: {list(all_programs.keys())}\")\n sys.exit(1)\n if forced_program in unconfigured:\n print(f\"ERROR: requested program '{forced_program}' is unconfigured (has placeholders).\")\n sys.exit(1)\n selected = forced_program\n selected_file = all_programs[forced_program]\n deferred = [p[\"name\"] for p in due if p[\"name\"] != forced_program]\n if selected in issue_programs:\n selected_issue = issue_programs[selected][\"issue_number\"]\n # Find target_metric: check the due list first, then parse from the program file\n for p in due:\n if p[\"name\"] == forced_program:\n selected_target_metric = p.get(\"target_metric\")\n break\n if selected_target_metric is None:\n # Program may have been skipped (completed/paused/plateau) β€” parse directly\n try:\n with open(selected_file) as _f:\n _content = _f.read()\n _content_stripped = re.sub(r'^(\\s*\\s*\\n)*', '', _content, flags=re.DOTALL)\n _fm = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n\", _content_stripped, re.DOTALL)\n if _fm:\n for _line in _fm.group(1).split(\"\\n\"):\n if _line.strip().startswith(\"target-metric:\"):\n selected_target_metric = float(_line.split(\":\", 1)[1].strip())\n break\n except (OSError, ValueError, TypeError):\n pass\n print(f\"FORCED: running program '{forced_program}' (manual dispatch)\")\nelif due:\n # Normal scheduling: pick the single most-overdue program\n due.sort(key=lambda p: p[\"last_run\"] or \"\") # None/empty sorts first (never run)\n selected = due[0][\"name\"]\n selected_file = due[0][\"file\"]\n selected_target_metric = due[0].get(\"target_metric\")\n deferred = [p[\"name\"] for p in due[1:]]\n # Check if the selected program is issue-based\n if selected in issue_programs:\n selected_issue = issue_programs[selected][\"issue_number\"]\n\nresult = {\n \"selected\": selected,\n \"selected_file\": selected_file,\n \"selected_issue\": selected_issue,\n \"selected_target_metric\": selected_target_metric,\n \"issue_programs\": {name: info[\"issue_number\"] for name, info in issue_programs.items()},\n \"deferred\": deferred,\n \"skipped\": skipped,\n \"unconfigured\": unconfigured,\n \"no_programs\": False,\n}\n\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\nwith open(\"/tmp/gh-aw/autoloop.json\", \"w\") as f:\n json.dump(result, f, indent=2)\n\nprint(\"=== Autoloop Program Check ===\")\nprint(f\"Selected program: {selected or '(none)'} ({selected_file or 'n/a'})\")\nprint(f\"Deferred (next run): {deferred or '(none)'}\")\nprint(f\"Programs skipped: {[s['name'] for s in skipped] or '(none)'}\")\nprint(f\"Programs unconfigured: {unconfigured or '(none)'}\")\n\nif not selected and not unconfigured:\n print(\"\\nNo programs due this run. Exiting early.\")\n sys.exit(1) # Non-zero exit skips the agent step\nPYEOF\n" + run: python3 .github/workflows/scripts/autoloop_scheduler.py # Repo memory git-based storage configuration from frontmatter processed below - name: Clone repo-memory branch (default) @@ -488,12 +492,12 @@ jobs: mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_b0478df8124c275d_EOF' + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_78a2a2ed2efbd98a_EOF' {"add_comment":{"hide_older_comments":false,"max":7,"target":"*"},"add_labels":{"max":2,"target":"*"},"create_issue":{"labels":["automation","autoloop"],"max":1,"title_prefix":"[Autoloop] "},"create_pull_request":{"draft":true,"labels":["automation","autoloop"],"max":2,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_files_policy":"fallback-to-issue","protected_path_prefixes":[".github/",".agents/"],"title_prefix":"[Autoloop] "},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"push_repo_memory":{"memories":[{"dir":"/tmp/gh-aw/repo-memory/default","id":"default","max_file_count":100,"max_file_size":10240,"max_patch_size":10240}]},"push_to_pull_request_branch":{"if_no_changes":"warn","max":2,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_path_prefixes":[".github/",".agents/"],"target":"*","title_prefix":"[Autoloop] "},"remove_labels":{"max":2,"target":"*"},"update_issue":{"allow_body":true,"max":3,"target":"*","title_prefix":"[Autoloop] "}} - GH_AW_SAFE_OUTPUTS_CONFIG_b0478df8124c275d_EOF + GH_AW_SAFE_OUTPUTS_CONFIG_78a2a2ed2efbd98a_EOF - name: Write Safe Outputs Tools run: | - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_e083150970ed99c8_EOF' + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_26e30063dd6b7824_EOF' { "description_suffixes": { "add_comment": " CONSTRAINTS: Maximum 7 comment(s) can be added. Target: *.", @@ -507,8 +511,8 @@ jobs: "repo_params": {}, "dynamic_tools": [] } - GH_AW_SAFE_OUTPUTS_TOOLS_META_e083150970ed99c8_EOF - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_5352c6397ffb4603_EOF' + GH_AW_SAFE_OUTPUTS_TOOLS_META_26e30063dd6b7824_EOF + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_53d20ba1aa3186df_EOF' { "add_comment": { "defaultMax": 1, @@ -767,7 +771,7 @@ jobs: "customValidation": "requiresOneOf:status,title,body" } } - GH_AW_SAFE_OUTPUTS_VALIDATION_5352c6397ffb4603_EOF + GH_AW_SAFE_OUTPUTS_VALIDATION_53d20ba1aa3186df_EOF node ${RUNNER_TEMP}/gh-aw/actions/generate_safe_outputs_tools.cjs - name: Generate Safe Outputs MCP Server Config id: safe-outputs-config @@ -837,7 +841,7 @@ jobs: export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.11' mkdir -p /home/runner/.copilot - cat << GH_AW_MCP_CONFIG_5d2d4ce1ccd1824d_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh + cat << GH_AW_MCP_CONFIG_e86cff10886b39f3_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh { "mcpServers": { "github": { @@ -878,7 +882,7 @@ jobs: "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" } } - GH_AW_MCP_CONFIG_5d2d4ce1ccd1824d_EOF + GH_AW_MCP_CONFIG_e86cff10886b39f3_EOF - name: Download activation artifact uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: diff --git a/.github/workflows/autoloop.md b/.github/workflows/autoloop.md index e564101..ba33d21 100644 --- a/.github/workflows/autoloop.md +++ b/.github/workflows/autoloop.md @@ -87,374 +87,7 @@ steps: GITHUB_REPOSITORY: ${{ github.repository }} AUTOLOOP_PROGRAM: ${{ github.event.inputs.program }} run: | - python3 - << 'PYEOF' - import os, json, re, glob, sys - import urllib.request, urllib.error - from datetime import datetime, timezone, timedelta - - programs_dir = ".autoloop/programs" - autoloop_dir = ".autoloop/programs" - template_file = os.path.join(autoloop_dir, "example.md") - - # Read program state from repo-memory (persistent git-backed storage) - github_token = os.environ.get("GITHUB_TOKEN", "") - repo = os.environ.get("GITHUB_REPOSITORY", "") - forced_program = os.environ.get("AUTOLOOP_PROGRAM", "").strip() - - # Repo-memory files are cloned to /tmp/gh-aw/repo-memory/{id}/ where {id} - # is derived from the branch-name configured in the tools section (memory/autoloop β†’ autoloop) - repo_memory_dir = "/tmp/gh-aw/repo-memory/autoloop" - - def parse_machine_state(content): - """Parse the βš™οΈ Machine State table from a state file. Returns a dict.""" - state = {} - m = re.search(r'## βš™οΈ Machine State.*?\n(.*?)(?=\n## |\Z)', content, re.DOTALL) - if not m: - return state - section = m.group(0) - for row in re.finditer(r'\|\s*(.+?)\s*\|\s*(.+?)\s*\|', section): - raw_key = row.group(1).strip() - raw_val = row.group(2).strip() - if raw_key.lower() in ("field", "---", ":---", ":---:", "---:"): - continue - key = raw_key.lower().replace(" ", "_") - val = None if raw_val in ("β€”", "-", "") else raw_val - state[key] = val - # Coerce types - for int_field in ("iteration_count", "consecutive_errors"): - if int_field in state: - try: - state[int_field] = int(state[int_field]) - except (ValueError, TypeError): - state[int_field] = 0 - if "paused" in state: - state["paused"] = str(state.get("paused", "")).lower() == "true" - if "completed" in state: - state["completed"] = str(state.get("completed", "")).lower() == "true" - # recent_statuses: stored as comma-separated words (e.g. "accepted, rejected, error") - rs_raw = state.get("recent_statuses") or "" - if rs_raw: - state["recent_statuses"] = [s.strip().lower() for s in rs_raw.split(",") if s.strip()] - else: - state["recent_statuses"] = [] - return state - - def read_program_state(program_name): - """Read scheduling state from the repo-memory state file.""" - state_file = os.path.join(repo_memory_dir, f"{program_name}.md") - if not os.path.isfile(state_file): - print(f" {program_name}: no state file found (first run)") - return {} - with open(state_file, encoding="utf-8") as f: - content = f.read() - return parse_machine_state(content) - - # Bootstrap: create autoloop programs directory and template if missing - if not os.path.isdir(autoloop_dir): - os.makedirs(autoloop_dir, exist_ok=True) - bt = chr(96) # backtick β€” avoid literal backticks that break gh-aw compiler - template = "\n".join([ - "", - "", - "", - "", - "# Autoloop Program", - "", - "", - "", - "## Goal", - "", - "", - "", - "REPLACE THIS with your optimization goal.", - "", - "## Target", - "", - "", - "", - "Only modify these files:", - f"- {bt}REPLACE_WITH_FILE{bt} -- (describe what this file does)", - "", - "Do NOT modify:", - "- (list files that must not be touched)", - "", - "## Evaluation", - "", - "", - "", - f"{bt}{bt}{bt}bash", - "REPLACE_WITH_YOUR_EVALUATION_COMMAND", - f"{bt}{bt}{bt}", - "", - f"The metric is {bt}REPLACE_WITH_METRIC_NAME{bt}. **Lower/Higher is better.** (pick one)", - "", - ]) - with open(template_file, "w") as f: - f.write(template) - # Leave the template unstaged β€” the agent will create a draft PR with it - print(f"BOOTSTRAPPED: created {template_file} locally (agent will create a draft PR)") - - # Find all program files from all locations: - # 1. Directory-based programs: .autoloop/programs//program.md (preferred) - # 2. Bare markdown programs: .autoloop/programs/.md (simple) - # 3. Issue-based programs: GitHub issues with the 'autoloop-program' label - program_files = [] - issue_programs = {} # name -> {issue_number, file} - - # Scan .autoloop/programs/ for directory-based programs - if os.path.isdir(programs_dir): - for entry in sorted(os.listdir(programs_dir)): - prog_dir = os.path.join(programs_dir, entry) - if os.path.isdir(prog_dir): - # Look for program.md inside the directory - prog_file = os.path.join(prog_dir, "program.md") - if os.path.isfile(prog_file): - program_files.append(prog_file) - - # Scan .autoloop/programs/ for bare markdown programs - bare_programs = sorted(glob.glob(os.path.join(autoloop_dir, "*.md"))) - for pf in bare_programs: - program_files.append(pf) - - # Scan GitHub issues with the 'autoloop-program' label - issue_programs_dir = "/tmp/gh-aw/issue-programs" - os.makedirs(issue_programs_dir, exist_ok=True) - try: - api_url = f"https://api.github.com/repos/{repo}/issues?labels=autoloop-program&state=open&per_page=100" - req = urllib.request.Request(api_url, headers={ - "Authorization": f"token {github_token}", - "Accept": "application/vnd.github.v3+json", - }) - with urllib.request.urlopen(req, timeout=30) as resp: - issues = json.loads(resp.read().decode()) - for issue in issues: - if issue.get("pull_request"): - continue # skip PRs - body = issue.get("body") or "" - title = issue.get("title") or "" - number = issue["number"] - # Derive program name from issue title: slugify to lowercase with hyphens - slug = re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-') - slug = re.sub(r'-+', '-', slug) # collapse consecutive hyphens - if not slug: - slug = f"issue-{number}" - # Avoid slug collisions: if another issue already claimed this slug, append issue number - if slug in issue_programs: - print(f" Warning: slug '{slug}' (issue #{number}) collides with issue #{issue_programs[slug]['issue_number']}, appending issue number") - slug = f"{slug}-{number}" - # Write issue body to a temp file so the scheduling loop can process it - issue_file = os.path.join(issue_programs_dir, f"{slug}.md") - with open(issue_file, "w") as f: - f.write(body) - program_files.append(issue_file) - issue_programs[slug] = {"issue_number": number, "file": issue_file, "title": title} - print(f" Found issue-based program: '{slug}' (issue #{number})") - except Exception as e: - print(f" Warning: could not fetch issue-based programs: {e}") - - if not program_files: - # Fallback to single-file locations - for path in [".autoloop/program.md", "program.md"]: - if os.path.isfile(path): - program_files = [path] - break - - if not program_files: - print("NO_PROGRAMS_FOUND") - os.makedirs("/tmp/gh-aw", exist_ok=True) - with open("/tmp/gh-aw/autoloop.json", "w") as f: - json.dump({"due": [], "skipped": [], "unconfigured": [], "no_programs": True}, f) - sys.exit(0) - - os.makedirs("/tmp/gh-aw", exist_ok=True) - now = datetime.now(timezone.utc) - due = [] - skipped = [] - unconfigured = [] - all_programs = {} # name -> file path (populated during scanning) - - # Schedule string to timedelta - def parse_schedule(s): - s = s.strip().lower() - m = re.match(r"every\s+(\d+)\s*h", s) - if m: - return timedelta(hours=int(m.group(1))) - m = re.match(r"every\s+(\d+)\s*m", s) - if m: - return timedelta(minutes=int(m.group(1))) - if s == "daily": - return timedelta(hours=24) - if s == "weekly": - return timedelta(days=7) - return None # No per-program schedule β€” always due - - def get_program_name(pf): - """Extract program name from file path. - Directory-based: .autoloop/programs//program.md -> - Bare markdown: .autoloop/programs/.md -> - Issue-based: /tmp/gh-aw/issue-programs/.md -> - """ - if pf.endswith("/program.md"): - # Directory-based program: name is the parent directory - return os.path.basename(os.path.dirname(pf)) - else: - # Bare markdown or issue-based program: name is the filename without .md - return os.path.splitext(os.path.basename(pf))[0] - - for pf in program_files: - name = get_program_name(pf) - all_programs[name] = pf - with open(pf) as f: - content = f.read() - - # Check sentinel (skip for issue-based programs which use AUTOLOOP:ISSUE-PROGRAM) - if "" in content: - unconfigured.append(name) - continue - - # Check for TODO/REPLACE placeholders - if re.search(r'\bTODO\b|\bREPLACE', content): - unconfigured.append(name) - continue - - # Parse optional YAML frontmatter for schedule and target-metric - # Strip leading HTML comments before checking (issue-based programs may have them) - content_stripped = re.sub(r'^(\s*\s*\n)*', '', content, flags=re.DOTALL) - schedule_delta = None - target_metric = None - fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content_stripped, re.DOTALL) - if fm_match: - for line in fm_match.group(1).split("\n"): - if line.strip().startswith("schedule:"): - schedule_str = line.split(":", 1)[1].strip() - schedule_delta = parse_schedule(schedule_str) - if line.strip().startswith("target-metric:"): - try: - target_metric = float(line.split(":", 1)[1].strip()) - except (ValueError, TypeError): - print(f" Warning: {name} has invalid target-metric value: {line.split(':', 1)[1].strip()}") - - # Read state from repo-memory - state = read_program_state(name) - if state: - print(f" {name}: last_run={state.get('last_run')}, iteration_count={state.get('iteration_count')}") - else: - print(f" {name}: no state found (first run)") - - last_run = None - lr = state.get("last_run") - if lr: - try: - last_run = datetime.fromisoformat(lr.replace("Z", "+00:00")) - except ValueError: - pass - - # Check if completed (target metric was reached) - if str(state.get("completed", "")).lower() == "true": - skipped.append({"name": name, "reason": f"completed: target metric reached"}) - continue - - # Check if paused (e.g., plateau or recurring errors) - if state.get("paused"): - skipped.append({"name": name, "reason": f"paused: {state.get('pause_reason', 'unknown')}"}) - continue - - # Auto-pause on plateau: 5+ consecutive rejections - recent = state.get("recent_statuses", [])[-5:] - if len(recent) >= 5 and all(s == "rejected" for s in recent): - skipped.append({"name": name, "reason": "plateau: 5 consecutive rejections"}) - continue - - # Check if due based on per-program schedule - if schedule_delta and last_run: - if now - last_run < schedule_delta: - skipped.append({"name": name, "reason": "not due yet", - "next_due": (last_run + schedule_delta).isoformat()}) - continue - - due.append({"name": name, "last_run": lr, "file": pf, "target_metric": target_metric}) - - # Pick the program to run - selected = None - selected_file = None - selected_issue = None - selected_target_metric = None - deferred = [] - - if forced_program: - # Manual dispatch requested a specific program β€” bypass scheduling - # (paused, not-due, and plateau programs can still be forced) - if forced_program not in all_programs: - print(f"ERROR: requested program '{forced_program}' not found.") - print(f" Available programs: {list(all_programs.keys())}") - sys.exit(1) - if forced_program in unconfigured: - print(f"ERROR: requested program '{forced_program}' is unconfigured (has placeholders).") - sys.exit(1) - selected = forced_program - selected_file = all_programs[forced_program] - deferred = [p["name"] for p in due if p["name"] != forced_program] - if selected in issue_programs: - selected_issue = issue_programs[selected]["issue_number"] - # Find target_metric: check the due list first, then parse from the program file - for p in due: - if p["name"] == forced_program: - selected_target_metric = p.get("target_metric") - break - if selected_target_metric is None: - # Program may have been skipped (completed/paused/plateau) β€” parse directly - try: - with open(selected_file) as _f: - _content = _f.read() - _content_stripped = re.sub(r'^(\s*\s*\n)*', '', _content, flags=re.DOTALL) - _fm = re.match(r"^---\s*\n(.*?)\n---\s*\n", _content_stripped, re.DOTALL) - if _fm: - for _line in _fm.group(1).split("\n"): - if _line.strip().startswith("target-metric:"): - selected_target_metric = float(_line.split(":", 1)[1].strip()) - break - except (OSError, ValueError, TypeError): - pass - print(f"FORCED: running program '{forced_program}' (manual dispatch)") - elif due: - # Normal scheduling: pick the single most-overdue program - due.sort(key=lambda p: p["last_run"] or "") # None/empty sorts first (never run) - selected = due[0]["name"] - selected_file = due[0]["file"] - selected_target_metric = due[0].get("target_metric") - deferred = [p["name"] for p in due[1:]] - # Check if the selected program is issue-based - if selected in issue_programs: - selected_issue = issue_programs[selected]["issue_number"] - - result = { - "selected": selected, - "selected_file": selected_file, - "selected_issue": selected_issue, - "selected_target_metric": selected_target_metric, - "issue_programs": {name: info["issue_number"] for name, info in issue_programs.items()}, - "deferred": deferred, - "skipped": skipped, - "unconfigured": unconfigured, - "no_programs": False, - } - - os.makedirs("/tmp/gh-aw", exist_ok=True) - with open("/tmp/gh-aw/autoloop.json", "w") as f: - json.dump(result, f, indent=2) - - print("=== Autoloop Program Check ===") - print(f"Selected program: {selected or '(none)'} ({selected_file or 'n/a'})") - print(f"Deferred (next run): {deferred or '(none)'}") - print(f"Programs skipped: {[s['name'] for s in skipped] or '(none)'}") - print(f"Programs unconfigured: {unconfigured or '(none)'}") - - if not selected and not unconfigured: - print("\nNo programs due this run. Exiting early.") - sys.exit(1) # Non-zero exit skips the agent step - PYEOF + python3 .github/workflows/scripts/autoloop_scheduler.py source: githubnext/autoloop engine: copilot @@ -538,6 +171,7 @@ The pre-step has already determined which program to run. Read `/tmp/gh-aw/autol - **`unconfigured`**: Programs that still have the sentinel or placeholder content. - **`skipped`**: Programs not due yet based on their per-program schedule. - **`no_programs`**: If `true`, no program files exist at all. +- **`not_due`**: If `true`, programs exist but none are due for this run. If `selected` is not null: 1. Read the program file from the `selected_file` path. @@ -569,7 +203,7 @@ GitHub Issues (labeled 'autoloop-program'): Each program runs independently with its own: - Goal, target files, and evaluation command - Metric tracking and best-metric history -- Steering issue: `[Autoloop: {program-name}] Steering` (persistent, links branch/PR/state) +- Program issue: `[Autoloop: {program-name}]` (a single GitHub issue labeled `autoloop-program` β€” created automatically for file-based programs, the source issue for issue-based programs β€” that hosts the status comment, per-iteration comments, and human steering) - Long-running branch: `autoloop/{program-name}` (persists across iterations) - Single draft PR per program: `[Autoloop: {program-name}]` (accumulates all accepted iterations) - State file: `{program-name}.md` in repo-memory (all state: scheduling, research context, iteration history) @@ -655,10 +289,10 @@ Examples: Each program has three coordinated resources: - **Branch + PR**: `autoloop/{program-name}` with a single draft PR -- **Steering Issue**: `[Autoloop: {program-name}] Steering` β€” persistent GitHub issue linking branch, PR, and state +- **Program Issue**: `[Autoloop: {program-name}]` β€” a single GitHub issue (labeled `autoloop-program`) that hosts the status comment, per-iteration comments, and human steering. For issue-based programs this is the source issue. For file-based programs it is auto-created on the first run. - **State File**: `{program-name}.md` in repo-memory β€” all state, history, and research context -All three reference each other. The steering issue is created on the first accepted iteration and updated with links to the PR and state. +All three reference each other. The program issue is created (or, for issue-based programs, adopted) on the first run and updated with links to the PR and state. ## Iteration Loop @@ -713,15 +347,15 @@ Each run executes **one iteration for the single selected program**: 2. Push the commit to the long-running branch. 3. If a draft PR does not already exist for this branch, create one: - Title: `[Autoloop: {program-name}]` - - Body includes: a summary of the program goal, link to the steering issue, the current best metric, and AI disclosure: `πŸ€– *This PR is maintained by Autoloop. Each accepted iteration adds a commit to this branch.*` + - Body includes: a summary of the program goal, link to the program issue, the current best metric, and AI disclosure: `πŸ€– *This PR is maintained by Autoloop. Each accepted iteration adds a commit to this branch.*` If a draft PR already exists, update the PR body with the latest metric and a summary of the most recent accepted iteration. Add a comment to the PR summarizing the iteration: what changed, old metric, new metric, improvement delta, and a link to the actions run. -4. Ensure the steering issue exists (see [Steering Issue](#steering-issue) below). Add a comment to the steering issue linking to the commit and actions run. +4. Ensure the program issue exists (see [Program Issue](#program-issue) below) β€” for file-based programs that have no program issue yet (`selected_issue` is null in `/tmp/gh-aw/autoloop.json`), create one and record its number in the state file's `Issue` field. 5. Update the state file `{program-name}.md` in the repo-memory folder: - Update the **βš™οΈ Machine State** table: reset `consecutive_errors` to 0, set `best_metric`, increment `iteration_count`, set `last_run` to current UTC timestamp, append `"accepted"` to `recent_statuses` (keep last 10), set `paused` to false. - Prepend an entry to **πŸ“Š Iteration History** (newest first) with status βœ…, metric, PR link, and a one-line summary of what changed and why it worked. - Update **πŸ“š Lessons Learned** if this iteration revealed something new about the problem or what works. - Update **πŸ”­ Future Directions** if this iteration opened new promising paths. -6. **If this is an issue-based program** (`selected_issue` is not null): update the status comment and post a per-run comment on the source issue (see [Issue-Based Program Updates](#issue-based-program-updates)). +6. **Update the program issue**: edit the status comment and post a per-iteration comment on the program issue (see [Program Issue](#program-issue)). 7. **Check halting condition** (see [Halting Condition](#halting-condition)): If the program has a `target-metric` in its frontmatter and the new `best_metric` meets or surpasses the target, mark the program as completed. **If the metric did not improve**: @@ -731,7 +365,7 @@ Each run executes **one iteration for the single selected program**: - Prepend an entry to **πŸ“Š Iteration History** with status ❌, metric, and a one-line summary of what was tried. - If this approach is conclusively ruled out (e.g., tried multiple variations and all fail), add it to **🚧 Foreclosed Avenues** with a clear explanation. - Update **πŸ”­ Future Directions** if this rejection clarified what to try next. -3. **If this is an issue-based program** (`selected_issue` is not null): update the status comment and post a per-run comment on the source issue (see [Issue-Based Program Updates](#issue-based-program-updates)). +3. **Update the program issue**: edit the status comment and post a per-iteration comment on the program issue (see [Program Issue](#program-issue)). **If evaluation could not run** (build failure, missing dependencies, etc.): 1. Discard the code changes (do not commit them to the long-running branch). @@ -739,50 +373,33 @@ Each run executes **one iteration for the single selected program**: - Update the **βš™οΈ Machine State** table: increment `consecutive_errors`, increment `iteration_count`, set `last_run`, append `"error"` to `recent_statuses` (keep last 10). - If `consecutive_errors` reaches 3+, set `paused` to `true` and set `pause_reason` in the Machine State table, and create an issue describing the problem. - Prepend an entry to **πŸ“Š Iteration History** with status ⚠️ and a brief error description. -3. **If this is an issue-based program** (`selected_issue` is not null): update the status comment and post a per-run comment on the source issue (see [Issue-Based Program Updates](#issue-based-program-updates)). +3. **Update the program issue**: edit the status comment and post a per-iteration comment on the program issue (see [Program Issue](#program-issue)). -## Steering Issue +## Program Issue -Maintain a single **persistent** open issue per program titled `[Autoloop: {program-name}] Steering`. The steering issue lives for the entire lifetime of the program. +Each program has **exactly one** open GitHub issue (labeled `autoloop-program`) titled `[Autoloop: {program-name}]`. This single issue is the source of truth for the program β€” it hosts: -The steering issue serves as the central coordination point linking together the program's key resources: -- The **long-running branch** `autoloop/{program-name}` and its draft PR -- The **state file** `{program-name}.md` in repo-memory (on the `memory/autoloop` branch) +- The **status comment** (the earliest bot comment, edited in place each iteration) β€” a dashboard of current state. +- A **per-iteration comment** for every iteration (accepted, rejected, or error) β€” the rolling log. +- **Human steering comments** β€” plain-prose comments from maintainers, treated by the agent as directives. -### Steering Issue Body Format +There are no separate "steering" or "experiment log" issues β€” they have all been collapsed into this one issue. -```markdown -πŸ€– *Autoloop β€” steering issue for the `{program-name}` program.* - -## Links - -- **Branch**: [`autoloop/{program-name}`](https://github.com/{owner}/{repo}/tree/autoloop/{program-name}) -- **Pull Request**: #{pr_number} -- **State File**: [`{program-name}.md`](https://github.com/{owner}/{repo}/blob/memory/autoloop/{program-name}.md) +### Auto-Creation for File-Based Programs -## Program +If `selected_issue` is `null` in `/tmp/gh-aw/autoloop.json`, the program is file-based **and** has no program issue yet. On the first run, create one with `create-issue`: -**Goal**: {one-line summary from program.md} -**Metric**: {metric-name} ({higher/lower} is better) -**Current best**: {best_metric} -**Iterations**: {iteration_count} -``` +- **Title**: `[Autoloop: {program-name}]` (the `[Autoloop] ` prefix is added automatically by the safe-output `title-prefix`, so pass the title as `{program-name}`). +- **Body**: the contents of the program file (`program.md`) plus a placeholder for the status comment so maintainers know one will be edited in place. +- **Labels**: `[autoloop-program, automation, autoloop]`. -### Steering Issue Rules +Record the new issue number in the state file's `Issue` field. On subsequent runs, the pre-step will discover the existing program issue (it scans open issues with the `autoloop-program` label) and `selected_issue` will be populated automatically. -- Create the steering issue on the **first accepted iteration** for the program if it does not already exist. -- **Update the issue body** whenever the best metric or PR number changes. -- **Add a comment** on each accepted iteration with a link to the commit and actions run. -- The steering issue is labeled `[automation, autoloop]`. -- Do NOT close the steering issue when the PR is merged β€” the branch continues to accumulate future iterations. - -## Issue-Based Program Updates - -When a program is defined via a GitHub issue (i.e., `selected_issue` is not null in `/tmp/gh-aw/autoloop.json`), the source issue itself serves as the program definition **and** as the primary interface for steering and monitoring the program. In addition to the normal iteration workflow (state file, steering issue, PR), you must also update the source issue. +For issue-based programs (`selected_issue` is not null on the very first run), no creation is needed β€” the source issue is already the program issue. The flow below is identical from there on. ### Status Comment -On the **first iteration** for an issue-based program, post a comment on the source issue. On **every subsequent iteration**, update that same comment (edit it, do not post a new one). This is the "status comment" β€” always the earliest bot comment on the issue. +On the **first iteration**, post a comment on the program issue. On **every subsequent iteration**, update that same comment (edit it, do not post a new one). This is the "status comment" β€” always the earliest bot comment on the issue. Find the status comment by searching for a comment containing ``. If multiple comments contain this sentinel, use the earliest one (lowest comment ID) and ignore the others. @@ -802,16 +419,16 @@ Find the status comment by searching for a comment containing `\s*\n)*", "", content, flags=re.DOTALL) + schedule_delta = None + target_metric = None + target_metric_invalid = None + fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content_stripped, re.DOTALL) + if not fm_match: + return schedule_delta, target_metric, target_metric_invalid + for line in fm_match.group(1).split("\n"): + if line.strip().startswith("schedule:"): + schedule_str = line.split(":", 1)[1].strip() + schedule_delta = parse_schedule(schedule_str) + if line.strip().startswith("target-metric:"): + raw = line.split(":", 1)[1].strip() + try: + target_metric = float(raw) + except (ValueError, TypeError): + target_metric_invalid = raw + return schedule_delta, target_metric, target_metric_invalid + + +def is_unconfigured(content): + """Return True if a program file still contains the unconfigured sentinel + or any TODO/REPLACE placeholder.""" + if "" in content: + return True + if re.search(r"\bTODO\b|\bREPLACE", content): + return True + return False + + +def check_skip_conditions(state): + """Return ``(should_skip, reason)`` based on the program state.""" + if str(state.get("completed", "")).lower() == "true" or state.get("completed") is True: + return True, "completed: target metric reached" + if state.get("paused"): + return True, "paused: {}".format(state.get("pause_reason", "unknown")) + recent = state.get("recent_statuses", [])[-5:] + if len(recent) >= 5 and all(s == "rejected" for s in recent): + return True, "plateau: 5 consecutive rejections" + return False, None + + +# --------------------------------------------------------------------------- +# I/O helpers +# --------------------------------------------------------------------------- + + +def read_program_state(program_name, repo_memory_dir=REPO_MEMORY_DIR): + """Read scheduling state from the repo-memory state file (or ``{}``).""" + state_file = os.path.join(repo_memory_dir, "{}.md".format(program_name)) + if not os.path.isfile(state_file): + print(" {}: no state file found (first run)".format(program_name)) + return {} + with open(state_file, encoding="utf-8") as f: + content = f.read() + return parse_machine_state(content) + + +def get_state_file_size(program_name, repo_memory_dir=REPO_MEMORY_DIR): + """Return the size of the program's state file in bytes (0 if missing). + + Surfaced in ``autoloop.json`` as ``state_file_size_bytes`` so the agent + can decide whether to compact the state file aggressively this iteration + (see the rolling-compaction rule in ``workflows/autoloop.md``'s + "Update Rules" section). + """ + state_file = os.path.join(repo_memory_dir, "{}.md".format(program_name)) + try: + st = os.stat(state_file) + except OSError: + return 0 + return st.st_size + + +def _bootstrap_template_if_missing(): + """Create ``.autoloop/programs/example.md`` if the directory is missing.""" + if os.path.isdir(PROGRAMS_DIR): + return + os.makedirs(PROGRAMS_DIR, exist_ok=True) + bt = chr(96) # backtick β€” keep gh-aw compiler happy if this ever gets inlined + template = "\n".join([ + "", + "", + "", + "", + "# Autoloop Program", + "", + "", + "", + "## Goal", + "", + "", + "", + "REPLACE THIS with your optimization goal.", + "", + "## Target", + "", + "", + "", + "Only modify these files:", + "- {bt}REPLACE_WITH_FILE{bt} -- (describe what this file does)".format(bt=bt), + "", + "Do NOT modify:", + "- (list files that must not be touched)", + "", + "## Evaluation", + "", + "", + "", + "{bt}{bt}{bt}bash".format(bt=bt), + "REPLACE_WITH_YOUR_EVALUATION_COMMAND", + "{bt}{bt}{bt}".format(bt=bt), + "", + "The metric is {bt}REPLACE_WITH_METRIC_NAME{bt}. **Lower/Higher is better.** (pick one)".format(bt=bt), + "", + ]) + with open(TEMPLATE_FILE, "w") as f: + f.write(template) + # Leave the template unstaged β€” the agent will create a draft PR with it + print("BOOTSTRAPPED: created {} locally (agent will create a draft PR)".format(TEMPLATE_FILE)) + + +def _scan_directory_programs(): + """Return paths of directory-based programs under ``PROGRAMS_DIR``.""" + out = [] + if not os.path.isdir(PROGRAMS_DIR): + return out + for entry in sorted(os.listdir(PROGRAMS_DIR)): + prog_dir = os.path.join(PROGRAMS_DIR, entry) + if os.path.isdir(prog_dir): + prog_file = os.path.join(prog_dir, "program.md") + if os.path.isfile(prog_file): + out.append(prog_file) + return out + + +def _scan_bare_programs(): + """Return paths of bare-markdown programs under ``PROGRAMS_DIR``.""" + return sorted(glob.glob(os.path.join(PROGRAMS_DIR, "*.md"))) + + +def _fetch_issue_programs(repo, github_token): + """Fetch open issues with the ``autoloop-program`` label and write their + bodies to ``ISSUE_PROGRAMS_DIR``. Returns ``(program_files, issue_programs)``. + + Errors are swallowed (with a warning) so a transient API failure doesn't + block the run for non-issue-based programs. + """ + program_files = [] + issue_programs = {} + os.makedirs(ISSUE_PROGRAMS_DIR, exist_ok=True) + next_url = ( + "https://api.github.com/repos/{}/issues" + "?labels=autoloop-program&state=open&per_page=100".format(repo) + ) + headers = { + "Authorization": "token {}".format(github_token), + "Accept": "application/vnd.github.v3+json", + } + issues = [] + try: + while next_url: + req = urllib.request.Request(next_url, headers=headers) + with urllib.request.urlopen(req, timeout=30) as resp: + page = json.loads(resp.read().decode()) + link_header = resp.headers.get("link") or resp.headers.get("Link") + issues.extend(page) + next_url = parse_link_header(link_header) + for issue in issues: + if issue.get("pull_request"): + continue # skip PRs + body = issue.get("body") or "" + title = issue.get("title") or "" + number = issue["number"] + slug = slugify_issue_title(title, number) + if slug in issue_programs: + print( + " Warning: slug '{}' (issue #{}) collides with issue #{}, " + "appending issue number".format( + slug, number, issue_programs[slug]["issue_number"] + ) + ) + slug = "{}-{}".format(slug, number) + issue_file = os.path.join(ISSUE_PROGRAMS_DIR, "{}.md".format(slug)) + with open(issue_file, "w") as f: + f.write(body) + program_files.append(issue_file) + issue_programs[slug] = {"issue_number": number, "file": issue_file, "title": title} + print(" Found issue-based program: '{}' (issue #{})".format(slug, number)) + except Exception as e: # noqa: BLE001 -- best-effort; logged below + print(" Warning: could not fetch issue-based programs: {}".format(e)) + return program_files, issue_programs + + +def _parse_target_metric_from_file(path): + """Re-parse a program file to extract its ``target-metric``, if any.""" + try: + with open(path) as f: + _, target_metric, _ = parse_program_frontmatter(f.read()) + return target_metric + except (OSError, ValueError, TypeError): + return None + + +# --------------------------------------------------------------------------- +# Existing PR lookup (single-PR-per-program invariant) +# --------------------------------------------------------------------------- + + +def _http_get_json(url, headers, timeout=30): + """Open ``url`` and return ``(parsed_body, link_header)``. + + Returns ``(None, None)`` on any HTTP/network error so callers can fall + through to the next strategy. Broken out into a module-level helper so + tests can monkey-patch it without touching ``urllib`` directly. + """ + try: + req = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(req, timeout=timeout) as resp: + body = json.loads(resp.read().decode()) + link_header = resp.headers.get("link") or resp.headers.get("Link") + return body, link_header + except (urllib.error.URLError, urllib.error.HTTPError, ValueError, OSError): + return None, None + + +def find_existing_pr_for_branch(repo, program_name, github_token, http_get_json=_http_get_json): + """Look up the open draft PR (if any) for ``autoloop/{program_name}``. + + Returns the PR number, or ``None`` if none is found. + + The single-PR-per-program invariant requires that we never open a second + draft PR for the same program. The agent uses the returned ``existing_pr`` + to decide between ``create-pull-request`` (only if ``None``) and + ``push-to-pull-request-branch`` (always preferred when an open PR exists). + + We also tolerate legacy framework-suffixed branch names of the form + ``autoloop/{program}-<6-40 hex chars>`` so installations upgrading from + before ``preserve-branch-name: true`` was set find their in-flight PR + rather than opening a second one. + """ + if not repo or not program_name or not github_token: + return None + owner = repo.split("/", 1)[0] + canonical_branch = "autoloop/{}".format(program_name) + headers = { + "Authorization": "token {}".format(github_token), + "Accept": "application/vnd.github.v3+json", + } + # Strategy 1: exact canonical branch name via the head= filter. + head_q = urllib.parse.quote("{}:{}".format(owner, canonical_branch), safe="") + url = "https://api.github.com/repos/{}/pulls?head={}&state=open".format(repo, head_q) + body, _ = http_get_json(url, headers) + if isinstance(body, list) and body: + first = body[0] + if isinstance(first, dict) and first.get("number"): + return first["number"] + + # Strategy 2: paginate open PRs and match either a legacy framework-suffixed + # branch (``autoloop/{name}-<6-40 hex>``) or a ``[Autoloop: {name}]`` title prefix. + suffix_regex = re.compile( + r"^autoloop/" + re.escape(program_name) + r"(-[0-9a-f]{6,40})?$" + ) + title_prefix = "[Autoloop: {}]".format(program_name) + next_url = "https://api.github.com/repos/{}/pulls?state=open&per_page=100".format(repo) + while next_url: + body, link_header = http_get_json(next_url, headers) + if not isinstance(body, list): + break + for pr in body: + if not isinstance(pr, dict): + continue + head_ref = "" + head = pr.get("head") or {} + if isinstance(head, dict): + head_ref = head.get("ref") or "" + if suffix_regex.match(head_ref): + return pr.get("number") + title = pr.get("title") + if isinstance(title, str) and title.startswith(title_prefix): + return pr.get("number") + next_url = parse_link_header(link_header) + return None + + +# --------------------------------------------------------------------------- +# Selection +# --------------------------------------------------------------------------- + + +def select_program(due, forced_program=None, all_programs=None, unconfigured=None, issue_programs=None): + """Pick the program to run. + + Returns ``(selected, selected_file, selected_issue, selected_target_metric, + deferred, error)``. ``error`` is a string describing why a forced selection + failed (and the caller should ``sys.exit(1)``); otherwise it is ``None``. + """ + all_programs = all_programs or {} + unconfigured = unconfigured or [] + issue_programs = issue_programs or {} + if forced_program: + if forced_program not in all_programs: + return ( + None, None, None, None, [], + "requested program '{}' not found. Available programs: {}".format( + forced_program, list(all_programs.keys()) + ), + ) + if forced_program in unconfigured: + return ( + None, None, None, None, [], + "requested program '{}' is unconfigured (has placeholders).".format( + forced_program + ), + ) + selected = forced_program + selected_file = all_programs[forced_program] + deferred = [p["name"] for p in due if p["name"] != forced_program] + selected_issue = ( + issue_programs[selected]["issue_number"] if selected in issue_programs else None + ) + selected_target_metric = None + for p in due: + if p["name"] == forced_program: + selected_target_metric = p.get("target_metric") + break + if selected_target_metric is None: + selected_target_metric = _parse_target_metric_from_file(selected_file) + return selected, selected_file, selected_issue, selected_target_metric, deferred, None + + if due: + # Normal scheduling: pick the single most-overdue program. + # ``last_run`` of None/empty sorts first (never run). + due_sorted = sorted(due, key=lambda p: p["last_run"] or "") + selected = due_sorted[0]["name"] + selected_file = due_sorted[0]["file"] + selected_target_metric = due_sorted[0].get("target_metric") + deferred = [p["name"] for p in due_sorted[1:]] + selected_issue = ( + issue_programs[selected]["issue_number"] if selected in issue_programs else None + ) + return selected, selected_file, selected_issue, selected_target_metric, deferred, None + + return None, None, None, None, [], None + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def main(): + github_token = os.environ.get("GITHUB_TOKEN", "") + repo = os.environ.get("GITHUB_REPOSITORY", "") + forced_program = os.environ.get("AUTOLOOP_PROGRAM", "").strip() + + _bootstrap_template_if_missing() + + # Find all program files from all locations: + # 1. Directory-based programs: .autoloop/programs//program.md (preferred) + # 2. Bare markdown programs: .autoloop/programs/.md (simple) + # 3. Issue-based programs: GitHub issues with the 'autoloop-program' label + program_files = [] + program_files.extend(_scan_directory_programs()) + program_files.extend(_scan_bare_programs()) + issue_files, issue_programs = _fetch_issue_programs(repo, github_token) + program_files.extend(issue_files) + + if not program_files: + # Fallback to single-file locations + for path in [".autoloop/program.md", "program.md"]: + if os.path.isfile(path): + program_files = [path] + break + + os.makedirs(OUTPUT_DIR, exist_ok=True) + + if not program_files: + print("NO_PROGRAMS_FOUND") + with open(OUTPUT_FILE, "w") as f: + json.dump( + { + "due": [], + "skipped": [], + "unconfigured": [], + "no_programs": True, + "head_branch": None, + "existing_pr": None, + }, + f, + ) + sys.exit(0) + + now = datetime.now(timezone.utc) + due = [] + skipped = [] + unconfigured = [] + all_programs = {} # name -> file path + + for pf in program_files: + name = get_program_name(pf) + all_programs[name] = pf + with open(pf) as f: + content = f.read() + + if is_unconfigured(content): + unconfigured.append(name) + continue + + schedule_delta, target_metric, invalid_target = parse_program_frontmatter(content) + if invalid_target is not None: + print(" Warning: {} has invalid target-metric value: {}".format(name, invalid_target)) + + # Read state from repo-memory + state = read_program_state(name) + if state: + print( + " {}: last_run={}, iteration_count={}".format( + name, state.get("last_run"), state.get("iteration_count") + ) + ) + else: + print(" {}: no state found (first run)".format(name)) + + last_run = None + lr = state.get("last_run") + if lr: + try: + last_run = datetime.fromisoformat(lr.replace("Z", "+00:00")) + except ValueError: + pass + + should_skip, reason = check_skip_conditions(state) + if should_skip: + skipped.append({"name": name, "reason": reason}) + continue + + # Check if due based on per-program schedule + if schedule_delta and last_run and now - last_run < schedule_delta: + skipped.append( + { + "name": name, + "reason": "not due yet", + "next_due": (last_run + schedule_delta).isoformat(), + } + ) + continue + + due.append({"name": name, "last_run": lr, "file": pf, "target_metric": target_metric}) + + selected, selected_file, selected_issue, selected_target_metric, deferred, error = ( + select_program(due, forced_program, all_programs, unconfigured, issue_programs) + ) + + if error: + print("ERROR: {}".format(error)) + sys.exit(1) + + if forced_program and selected: + print("FORCED: running program '{}' (manual dispatch)".format(forced_program)) + + # Look up the existing draft PR (if any) for the selected program, so the + # agent can enforce the single-PR-per-program invariant: never call + # create-pull-request when a PR for autoloop/{name} already exists. + # head_branch is always the canonical name (no suffix, no hash). + head_branch = None + existing_pr = None + if selected: + head_branch = "autoloop/{}".format(selected) + try: + existing_pr = find_existing_pr_for_branch(repo, selected, github_token) + except Exception as e: # noqa: BLE001 -- best-effort lookup + print(" Warning: existing PR lookup failed for {}: {}".format(selected, e)) + existing_pr = None + + result = { + "selected": selected, + "selected_file": selected_file, + "selected_issue": selected_issue, + "selected_target_metric": selected_target_metric, + "state_file_size_bytes": get_state_file_size(selected) if selected else 0, + "state_file_max_bytes": STATE_FILE_MAX_BYTES, + "issue_programs": { + name: info["issue_number"] for name, info in issue_programs.items() + }, + "deferred": deferred, + "skipped": skipped, + "unconfigured": unconfigured, + "no_programs": False, + "head_branch": head_branch, + "existing_pr": existing_pr, + } + + with open(OUTPUT_FILE, "w") as f: + json.dump(result, f, indent=2) + + print("=== Autoloop Program Check ===") + print("Selected program: {} ({})".format(selected or "(none)", selected_file or "n/a")) + print("Deferred (next run): {}".format(deferred or "(none)")) + print("Programs skipped: {}".format([s["name"] for s in skipped] or "(none)")) + print("Programs unconfigured: {}".format(unconfigured or "(none)")) + + if not selected and not unconfigured: + print("\nNo programs due this run. Exiting early.") + sys.exit(1) # Non-zero exit skips the agent step + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/sync-branches.lock.yml b/.github/workflows/sync-branches.lock.yml index 89d965b..6972f09 100644 --- a/.github/workflows/sync-branches.lock.yml +++ b/.github/workflows/sync-branches.lock.yml @@ -24,7 +24,7 @@ # Runs whenever the default branch changes and merges it into all active # autoloop/* branches so that program iterations always build on the latest code. # -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"1ef97f2f3c4b1726702db78fc5e36ea72b0ee7cdf378cc88263a3d41d61b6ac7","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"7347655448caf952972200853ace37356f693514cb8a4ae018797501b79c86a5","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"} name: "Sync Branches" "on": @@ -139,13 +139,13 @@ jobs: run: | bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh { - cat << 'GH_AW_PROMPT_2f026df8ba761c4c_EOF' + cat << 'GH_AW_PROMPT_4db17a6c15417a22_EOF' - GH_AW_PROMPT_2f026df8ba761c4c_EOF + GH_AW_PROMPT_4db17a6c15417a22_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" - cat << 'GH_AW_PROMPT_2f026df8ba761c4c_EOF' + cat << 'GH_AW_PROMPT_4db17a6c15417a22_EOF' The following GitHub context information is available for this workflow: {{#if __GH_AW_GITHUB_ACTOR__ }} @@ -174,12 +174,12 @@ jobs: {{/if}} - GH_AW_PROMPT_2f026df8ba761c4c_EOF + GH_AW_PROMPT_4db17a6c15417a22_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_prompt.md" - cat << 'GH_AW_PROMPT_2f026df8ba761c4c_EOF' + cat << 'GH_AW_PROMPT_4db17a6c15417a22_EOF' {{#runtime-import .github/workflows/sync-branches.md}} - GH_AW_PROMPT_2f026df8ba761c4c_EOF + GH_AW_PROMPT_4db17a6c15417a22_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 @@ -289,7 +289,7 @@ jobs: DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} GITHUB_REPOSITORY: ${{ github.repository }} name: Merge default branch into all autoloop program branches - run: "python3 - << 'PYEOF'\nimport os, subprocess, sys\n\ntoken = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\ndefault_branch = os.environ.get(\"DEFAULT_BRANCH\", \"main\")\n\n# List all remote branches matching the autoloop/* pattern\nresult = subprocess.run(\n [\"git\", \"branch\", \"-r\", \"--list\", \"origin/autoloop/*\"],\n capture_output=True, text=True\n)\nif result.returncode != 0:\n print(f\"Failed to list remote branches: {result.stderr}\")\n sys.exit(0)\n\nbranches = [b.strip().replace(\"origin/\", \"\") for b in result.stdout.strip().split(\"\\n\") if b.strip()]\n\nif not branches:\n print(\"No autoloop/* branches found. Nothing to sync.\")\n sys.exit(0)\n\nprint(f\"Found {len(branches)} autoloop branch(es) to sync: {branches}\")\n\nfailed = []\nfor branch in branches:\n print(f\"\\n--- Syncing {branch} with {default_branch} ---\")\n\n # Fetch both branches\n subprocess.run([\"git\", \"fetch\", \"origin\", branch], capture_output=True)\n subprocess.run([\"git\", \"fetch\", \"origin\", default_branch], capture_output=True)\n\n # Check out the program branch\n checkout = subprocess.run(\n [\"git\", \"checkout\", branch],\n capture_output=True, text=True\n )\n if checkout.returncode != 0:\n # Try creating a local tracking branch\n checkout = subprocess.run(\n [\"git\", \"checkout\", \"-b\", branch, f\"origin/{branch}\"],\n capture_output=True, text=True\n )\n if checkout.returncode != 0:\n print(f\" Failed to checkout {branch}: {checkout.stderr}\")\n failed.append(branch)\n continue\n\n # Merge the default branch into the program branch\n merge = subprocess.run(\n [\"git\", \"merge\", f\"origin/{default_branch}\", \"--no-edit\",\n \"-m\", f\"Merge {default_branch} into {branch}\"],\n capture_output=True, text=True\n )\n if merge.returncode != 0:\n print(f\" Merge conflict or failure for {branch}: {merge.stderr}\")\n # Abort the merge to leave a clean state\n subprocess.run([\"git\", \"merge\", \"--abort\"], capture_output=True)\n failed.append(branch)\n continue\n\n # Push the updated branch\n push = subprocess.run(\n [\"git\", \"push\", \"origin\", branch],\n capture_output=True, text=True\n )\n if push.returncode != 0:\n print(f\" Failed to push {branch}: {push.stderr}\")\n failed.append(branch)\n continue\n\n print(f\" Successfully synced {branch}\")\n\n# Return to default branch\nsubprocess.run([\"git\", \"checkout\", default_branch], capture_output=True)\n\nif failed:\n print(f\"\\n⚠️ Failed to sync {len(failed)} branch(es): {failed}\")\n print(\"These branches may need manual conflict resolution.\")\n # Don't fail the workflow β€” log the issue but continue\nelse:\n print(f\"\\nβœ… All {len(branches)} branch(es) synced successfully.\")\nPYEOF" + run: "python3 - << 'PYEOF'\nimport os, subprocess, sys\n\ntoken = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\ndefault_branch = os.environ.get(\"DEFAULT_BRANCH\", \"main\")\n\n# List all remote branches matching the autoloop/* pattern\nresult = subprocess.run(\n [\"git\", \"branch\", \"-r\", \"--list\", \"origin/autoloop/*\"],\n capture_output=True, text=True\n)\nif result.returncode != 0:\n print(f\"Failed to list remote branches: {result.stderr}\")\n sys.exit(0)\n\nbranches = [b.strip().replace(\"origin/\", \"\") for b in result.stdout.strip().split(\"\\n\") if b.strip()]\n\nif not branches:\n print(\"No autoloop/* branches found. Nothing to sync.\")\n sys.exit(0)\n\nprint(f\"Found {len(branches)} autoloop branch(es) to sync: {branches}\")\n\ndef rev_count(range_spec):\n r = subprocess.run(\n [\"git\", \"rev-list\", \"--count\", range_spec],\n capture_output=True, text=True\n )\n if r.returncode != 0:\n return None\n try:\n return int(r.stdout.strip())\n except ValueError:\n return None\n\nfailed = []\nfor branch in branches:\n print(f\"\\n--- Syncing {branch} with {default_branch} ---\")\n\n # Fetch both branches so the ahead/behind counts below are computed\n # against up-to-date local copies of the remote tips.\n subprocess.run([\"git\", \"fetch\", \"origin\", branch], capture_output=True)\n subprocess.run([\"git\", \"fetch\", \"origin\", default_branch], capture_output=True)\n\n # Compute ahead/behind counts using the remote-tracking refs so we\n # make a decision based on commit delta (not content delta).\n ahead = rev_count(f\"origin/{default_branch}..origin/{branch}\")\n behind = rev_count(f\"origin/{branch}..origin/{default_branch}\")\n if ahead is None or behind is None:\n print(f\" Failed to compute ahead/behind for {branch}\")\n failed.append(branch)\n continue\n print(f\" ahead={ahead} behind={behind}\")\n\n if ahead == 0 and behind > 0:\n # All of the branch's commits are already in the default branch.\n # Merging would produce a noisy \"Merge main into branch\" commit\n # that re-exposes every historical file as a patch touch β€” the\n # failure mode that triggers gh-aw's E003 (>100 files) when a\n # new PR is opened. Fast-forward the canonical branch instead.\n # This is lossless because ahead=0 proves every commit on the\n # branch is already reachable from the default branch.\n ff = subprocess.run(\n [\"git\", \"checkout\", \"-B\", branch, f\"origin/{default_branch}\"],\n capture_output=True, text=True\n )\n if ff.returncode != 0:\n print(f\" Failed to fast-forward {branch}: {ff.stderr}\")\n failed.append(branch)\n continue\n # Use --force-with-lease so that if anyone else is simultaneously\n # pushing to the branch, the update is rejected rather than\n # overwriting their commits.\n push = subprocess.run(\n [\"git\", \"push\", \"--force-with-lease\", \"origin\", branch],\n capture_output=True, text=True\n )\n if push.returncode != 0:\n print(f\" Failed to force-push {branch}: {push.stderr}\")\n failed.append(branch)\n continue\n print(f\" Fast-forwarded {branch} to origin/{default_branch}\")\n continue\n\n if ahead == 0 and behind == 0:\n # Already at default branch β€” nothing to do.\n print(f\" {branch} is already up to date with origin/{default_branch}\")\n continue\n\n if ahead > 0 and behind == 0:\n # Unique work preserved; no upstream drift to merge.\n print(f\" {branch} is ahead of origin/{default_branch} with no upstream drift; nothing to merge.\")\n continue\n\n # True divergence (ahead > 0 and behind > 0): check out and merge.\n checkout = subprocess.run(\n [\"git\", \"checkout\", \"-B\", branch, f\"origin/{branch}\"],\n capture_output=True, text=True\n )\n if checkout.returncode != 0:\n print(f\" Failed to checkout {branch}: {checkout.stderr}\")\n failed.append(branch)\n continue\n\n # Merge the default branch into the program branch\n merge = subprocess.run(\n [\"git\", \"merge\", f\"origin/{default_branch}\", \"--no-edit\",\n \"-m\", f\"Merge {default_branch} into {branch}\"],\n capture_output=True, text=True\n )\n if merge.returncode != 0:\n print(f\" Merge conflict or failure for {branch}: {merge.stderr}\")\n # Abort the merge to leave a clean state\n subprocess.run([\"git\", \"merge\", \"--abort\"], capture_output=True)\n failed.append(branch)\n continue\n\n # Push the updated branch\n push = subprocess.run(\n [\"git\", \"push\", \"origin\", branch],\n capture_output=True, text=True\n )\n if push.returncode != 0:\n print(f\" Failed to push {branch}: {push.stderr}\")\n failed.append(branch)\n continue\n\n print(f\" Successfully synced {branch}\")\n\n# Return to default branch\nsubprocess.run([\"git\", \"checkout\", default_branch], capture_output=True)\n\nif failed:\n print(f\"\\n⚠️ Failed to sync {len(failed)} branch(es): {failed}\")\n print(\"These branches may need manual conflict resolution.\")\n # Don't fail the workflow β€” log the issue but continue\nelse:\n print(f\"\\nβœ… All {len(branches)} branch(es) synced successfully.\")\nPYEOF" - name: Checkout PR branch id: checkout-pr if: | @@ -345,7 +345,7 @@ jobs: export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.11' mkdir -p /home/runner/.copilot - cat << GH_AW_MCP_CONFIG_cf6e06fbb22b66a8_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh + cat << GH_AW_MCP_CONFIG_c44c901ef7ee68bd_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh { "mcpServers": { "github": { @@ -372,7 +372,7 @@ jobs: "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" } } - GH_AW_MCP_CONFIG_cf6e06fbb22b66a8_EOF + GH_AW_MCP_CONFIG_c44c901ef7ee68bd_EOF - name: Download activation artifact uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: diff --git a/.github/workflows/sync-branches.md b/.github/workflows/sync-branches.md index a45df2d..3d87064 100644 --- a/.github/workflows/sync-branches.md +++ b/.github/workflows/sync-branches.md @@ -48,25 +48,82 @@ steps: print(f"Found {len(branches)} autoloop branch(es) to sync: {branches}") + def rev_count(range_spec): + r = subprocess.run( + ["git", "rev-list", "--count", range_spec], + capture_output=True, text=True + ) + if r.returncode != 0: + return None + try: + return int(r.stdout.strip()) + except ValueError: + return None + failed = [] for branch in branches: print(f"\n--- Syncing {branch} with {default_branch} ---") - # Fetch both branches + # Fetch both branches so the ahead/behind counts below are computed + # against up-to-date local copies of the remote tips. subprocess.run(["git", "fetch", "origin", branch], capture_output=True) subprocess.run(["git", "fetch", "origin", default_branch], capture_output=True) - # Check out the program branch + # Compute ahead/behind counts using the remote-tracking refs so we + # make a decision based on commit delta (not content delta). + ahead = rev_count(f"origin/{default_branch}..origin/{branch}") + behind = rev_count(f"origin/{branch}..origin/{default_branch}") + if ahead is None or behind is None: + print(f" Failed to compute ahead/behind for {branch}") + failed.append(branch) + continue + print(f" ahead={ahead} behind={behind}") + + if ahead == 0 and behind > 0: + # All of the branch's commits are already in the default branch. + # Merging would produce a noisy "Merge main into branch" commit + # that re-exposes every historical file as a patch touch β€” the + # failure mode that triggers gh-aw's E003 (>100 files) when a + # new PR is opened. Fast-forward the canonical branch instead. + # This is lossless because ahead=0 proves every commit on the + # branch is already reachable from the default branch. + ff = subprocess.run( + ["git", "checkout", "-B", branch, f"origin/{default_branch}"], + capture_output=True, text=True + ) + if ff.returncode != 0: + print(f" Failed to fast-forward {branch}: {ff.stderr}") + failed.append(branch) + continue + # Use --force-with-lease so that if anyone else is simultaneously + # pushing to the branch, the update is rejected rather than + # overwriting their commits. + push = subprocess.run( + ["git", "push", "--force-with-lease", "origin", branch], + capture_output=True, text=True + ) + if push.returncode != 0: + print(f" Failed to force-push {branch}: {push.stderr}") + failed.append(branch) + continue + print(f" Fast-forwarded {branch} to origin/{default_branch}") + continue + + if ahead == 0 and behind == 0: + # Already at default branch β€” nothing to do. + print(f" {branch} is already up to date with origin/{default_branch}") + continue + + if ahead > 0 and behind == 0: + # Unique work preserved; no upstream drift to merge. + print(f" {branch} is ahead of origin/{default_branch} with no upstream drift; nothing to merge.") + continue + + # True divergence (ahead > 0 and behind > 0): check out and merge. checkout = subprocess.run( - ["git", "checkout", branch], + ["git", "checkout", "-B", branch, f"origin/{branch}"], capture_output=True, text=True ) - if checkout.returncode != 0: - # Try creating a local tracking branch - checkout = subprocess.run( - ["git", "checkout", "-b", branch, f"origin/{branch}"], - capture_output=True, text=True - ) if checkout.returncode != 0: print(f" Failed to checkout {branch}: {checkout.stderr}") failed.append(branch) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 21782d1..8241b7d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,5 +13,5 @@ jobs: - uses: actions/setup-python@v5 with: python-version: "3.12" - - run: pip install pytest + - run: pip install pytest pyyaml - run: pytest tests/ -v diff --git a/AGENTS.md b/AGENTS.md index 80b2056..63d0475 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -12,8 +12,10 @@ autoloop/ β”œβ”€β”€ workflows/ ← Agentic Workflow definitions β”‚ β”œβ”€β”€ autoloop.md ← main autoloop workflow (compiled by gh-aw) β”‚ β”œβ”€β”€ sync-branches.md ← syncs default branch into autoloop/* branches -β”‚ └── shared/ ← shared workflow fragments -β”‚ └── reporting.md +β”‚ β”œβ”€β”€ shared/ ← shared workflow fragments +β”‚ β”‚ └── reporting.md +β”‚ └── scripts/ ← standalone scripts invoked from steps +β”‚ └── autoloop_scheduler.py ← scheduler (see workflows/autoloop.md) β”œβ”€β”€ .autoloop/ β”‚ └── programs/ ← research programs (directory-based) β”‚ β”œβ”€β”€ function_minimization/ @@ -89,13 +91,13 @@ Programs can include an Evolution Strategy section (inspired by OpenEvolve) that - Evaluation commands must output JSON with a numeric metric - Each program has a single **long-running branch** named `autoloop/` that accumulates all accepted iterations - A single **draft PR** per program is created on the first accepted iteration and accumulates subsequent commits -- A **steering issue** per program (`[Autoloop: ] Steering`) links the branch, PR, and state together +- A single **program issue** per program (`[Autoloop: ]`, labeled `autoloop-program`) is the single source of truth for the program β€” it hosts the status comment, per-iteration comments, and human steering. For issue-based programs this is the source issue; for file-based programs it is auto-created on the first run. - All state lives in repo-memory β€” per-program state files on the `memory/autoloop` branch are the single source of truth for both scheduling/machine state and human-readable research context - State files: `.md` on the `memory/autoloop` branch (per-program with Machine State table + research sections) -- Experiment history is tracked in the state file's Iteration History section and via per-run comments on the source issue (for issue-based programs) +- Experiment history is tracked in the state file's Iteration History section and via per-iteration comments on the program issue - The default branch is automatically merged into all `autoloop/*` branches whenever it changes - Issue-based programs are discovered via the `autoloop-program` label; the issue body is the program definition -- For issue-based programs, a status comment (marked with ``) is maintained on the source issue, and a per-run comment is posted after each iteration +- A status comment (marked with ``) is maintained on every program issue (the earliest bot comment, edited in place each iteration), and a per-iteration comment is posted after each iteration - Programs can be **open-ended** (run indefinitely) or **goal-oriented** (run until `target-metric` in frontmatter is reached). When a goal-oriented program completes, the `autoloop-program` label is removed and `autoloop-completed` is added (for issue-based programs) - When proposing a new program, always clarify whether it is open-ended or goal-oriented @@ -134,6 +136,7 @@ To deploy the workflow to a repository: 1. Copy `workflows/autoloop.md` to `.github/workflows/autoloop.md` in the target repo 2. Copy `workflows/sync-branches.md` to `.github/workflows/sync-branches.md` in the target repo 3. Copy `workflows/shared/` to `.github/workflows/shared/` in the target repo -4. Run `gh aw compile autoloop` and `gh aw compile sync-branches` to generate the lock files -5. Copy program directories to `.autoloop/programs/` in the target repo -6. Commit and push +4. Copy `workflows/scripts/` to `.github/workflows/scripts/` in the target repo +5. Run `gh aw compile autoloop` and `gh aw compile sync-branches` to generate the lock files +6. Copy program directories to `.autoloop/programs/` in the target repo +7. Commit and push diff --git a/tests/conftest.py b/tests/conftest.py index e23352c..0279751 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,117 +1,36 @@ -""" -Extract scheduling functions directly from the workflow pre-step heredoc. - -Instead of duplicating the workflow's JavaScript code in a separate module, we parse -workflows/autoloop.md, extract the JavaScript heredoc, write the function definitions -to a temp CommonJS module, and call them via Node.js subprocess. +"""Test fixtures for the standalone Autoloop scheduler. -This ensures tests always run against the actual workflow code. +The scheduler logic lives in ``workflows/scripts/autoloop_scheduler.py`` and is +also distributed at ``.github/workflows/scripts/autoloop_scheduler.py`` (the +dogfooded deploy copy). Tests import the source module directly via importlib. """ -import json +import importlib.util import os -import re -import subprocess -import tempfile -from datetime import timedelta - -WORKFLOW_PATH = os.path.join(os.path.dirname(__file__), "..", "workflows", "autoloop.md") - -# Path to the extracted JS module -_JS_MODULE_PATH = os.path.join(tempfile.gettempdir(), "autoloop_test_functions.cjs") - - -def _load_workflow_functions(): - """Parse workflows/autoloop.md and extract JS function defs from the pre-step.""" - with open(WORKFLOW_PATH) as f: - content = f.read() - - # Extract the JavaScript heredoc between JSEOF markers - m = re.search(r"node - << 'JSEOF'\n(.*?)\n\s*JSEOF", content, re.DOTALL) - assert m, "Could not find JSEOF heredoc in workflows/autoloop.md" - source = m.group(1) - - # Extract function definitions: everything up to the main() async function. - # Functions are defined before 'async function main()' - lines = source.split("\n") - func_lines = [] - for line in lines: - if line.strip().startswith("async function main"): - break - func_lines.append(line) - - func_source = "\n".join(func_lines) - - # Write to a temp .cjs file with module.exports - with open(_JS_MODULE_PATH, "w") as f: - f.write(func_source) - f.write( - "\n\nmodule.exports = " - "{ parseMachineState, parseSchedule, getProgramName, readProgramState, parseLinkHeader };\n" - ) - - return True - - -def _call_js(func_name, *args): - """Call a JS function from the extracted workflow module and return the result.""" - args_json = json.dumps(list(args)) - escaped_path = json.dumps(_JS_MODULE_PATH) - script = ( - "const m = require(" + escaped_path + ");\n" - "const result = m." + func_name + "(..." + args_json + ");\n" - "process.stdout.write(JSON.stringify(result === undefined ? null : result));\n" - ) - result = subprocess.run( - ["node", "-e", script], - capture_output=True, - text=True, - timeout=10, +import sys + +# Path to the standalone scheduler script (source-of-truth lives in workflows/). +SCHEDULER_PATH = os.path.normpath( + os.path.join( + os.path.dirname(__file__), + "..", + "workflows", + "scripts", + "autoloop_scheduler.py", ) - if result.returncode != 0: - raise RuntimeError("Node.js error calling " + func_name + ": " + result.stderr) - if not result.stdout.strip(): - return None - return json.loads(result.stdout) - - -# Initialize at import time -_load_workflow_functions() - - -def _parse_schedule_wrapper(s): - """Python wrapper for JS parseSchedule. Converts milliseconds to timedelta.""" - ms = _call_js("parseSchedule", s) - if ms is None: - return None - return timedelta(milliseconds=ms) +) - -def _parse_machine_state_wrapper(content): - """Python wrapper for JS parseMachineState.""" - return _call_js("parseMachineState", content) - - -def _get_program_name_wrapper(pf): - """Python wrapper for JS getProgramName.""" - return _call_js("getProgramName", pf) +_spec = importlib.util.spec_from_file_location("autoloop_scheduler", SCHEDULER_PATH) +autoloop_scheduler = importlib.util.module_from_spec(_spec) +sys.modules["autoloop_scheduler"] = autoloop_scheduler +_spec.loader.exec_module(autoloop_scheduler) +# Backwards-compatible function map (mirrors the previous JS-extracting conftest). _funcs = { - "parse_schedule": _parse_schedule_wrapper, - "parse_machine_state": _parse_machine_state_wrapper, - "get_program_name": _get_program_name_wrapper, - "read_program_state": lambda name: _call_js("readProgramState", name), - "parse_link_header": lambda header: _call_js("parseLinkHeader", header), + "parse_schedule": autoloop_scheduler.parse_schedule, + "parse_machine_state": autoloop_scheduler.parse_machine_state, + "get_program_name": autoloop_scheduler.get_program_name, + "read_program_state": autoloop_scheduler.read_program_state, + "parse_link_header": autoloop_scheduler.parse_link_header, } - - -def _extract_inline_pattern(name): - """Extract the JavaScript heredoc source from the workflow. - - This is a helper for inspecting the full inline source if needed. - """ - with open(WORKFLOW_PATH) as f: - content = f.read() - m = re.search(r"node - << 'JSEOF'\n(.*?)\n\s*JSEOF", content, re.DOTALL) - return m.group(1) if m else "" diff --git a/tests/test_scheduler_e2e.py b/tests/test_scheduler_e2e.py new file mode 100644 index 0000000..758cdf6 --- /dev/null +++ b/tests/test_scheduler_e2e.py @@ -0,0 +1,280 @@ +"""End-to-end fixture tests for the standalone Autoloop scheduler. + +These tests run ``workflows/scripts/autoloop_scheduler.py`` as a subprocess in +isolated temp directories and validate the resulting ``autoloop.json``. They +cover the scenarios called out in the extraction issue: + +* most-overdue selection (``last_run`` tie-break) +* missing state file β†’ first run +* ``paused: true`` β†’ skipped with reason +* ``completed: true`` β†’ skipped +* ``AUTOLOOP_PROGRAM=`` β†’ forced selection bypasses scheduling +* No programs found β†’ ``no_programs: true`` + +The scheduler talks to the GitHub issues API; tests point ``GITHUB_REPOSITORY`` +at a non-resolvable host so the request fails fast, falling back to the +filesystem-discovered programs only (the script logs a warning and continues β€” +the same behaviour exercised in the workflow when issues are absent). +""" + +import json +import os +import shutil +import subprocess +import sys +import tempfile +import textwrap + +import pytest + +from conftest import SCHEDULER_PATH + +PROGRAM_TEMPLATE = textwrap.dedent("""\ + --- + schedule: every 6h + --- + + # {name} + + ## Goal + Optimize {name}. + + ## Target + - file.py + + ## Evaluation + ```bash + python eval.py + ``` + + The metric is `score`. Higher is better. +""") + + +def _state_file(name, *, last_run=None, paused=False, completed=False, pause_reason=None): + """Render a minimal repo-memory state file for a program.""" + rows = [ + ("Last Run", last_run if last_run else "β€”"), + ("Iteration Count", "0"), + ("Best Metric", "β€”"), + ("Target Metric", "β€”"), + ("Paused", "true" if paused else "false"), + ("Pause Reason", pause_reason or "β€”"), + ("Completed", "true" if completed else "false"), + ("Completed Reason", "β€”"), + ("Consecutive Errors", "0"), + ("Recent Statuses", "β€”"), + ] + body = "\n".join("| {} | {} |".format(k, v) for k, v in rows) + return textwrap.dedent("""\ + # Autoloop: {name} + + ## βš™οΈ Machine State + + | Field | Value | + |-------|-------| + {body} + """).format(name=name, body=body) + + +def _run_scheduler(workdir, *, forced=None, repo="bogus.invalid/bogus"): + """Run the scheduler in ``workdir`` and return ``(returncode, autoloop_json)``. + + ``GITHUB_REPOSITORY`` defaults to a bogus DNS name so the issues fetch fails + instantly (DNS lookup error β†’ caught, scheduler continues with filesystem + programs only). ``HOME`` is also rewritten so any state under ``/tmp/gh-aw`` + is owned by the test. + """ + env = os.environ.copy() + env["GITHUB_TOKEN"] = "dummy" + env["GITHUB_REPOSITORY"] = repo + if forced is not None: + env["AUTOLOOP_PROGRAM"] = forced + else: + env.pop("AUTOLOOP_PROGRAM", None) + + # The scheduler always writes /tmp/gh-aw/autoloop.json; isolate via TMPDIR + # so concurrent tests don't clobber each other. + tmproot = os.path.join(workdir, "_tmp") + os.makedirs(tmproot, exist_ok=True) + env["TMPDIR"] = tmproot + + # The scheduler always writes /tmp/gh-aw/autoloop.json. We can't redirect + # this via env vars without changing the script's contract, so tests share + # that path and clean up the previous run's output before invoking again. + out_path = "/tmp/gh-aw/autoloop.json" + if os.path.exists(out_path): + os.remove(out_path) + + proc = subprocess.run( + [sys.executable, SCHEDULER_PATH], + cwd=workdir, + env=env, + capture_output=True, + text=True, + timeout=30, + ) + + autoloop = None + if os.path.exists(out_path): + with open(out_path) as f: + autoloop = json.load(f) + return proc, autoloop + + +@pytest.fixture +def workdir(tmp_path, monkeypatch): + """Return an isolated workdir with ``.autoloop/programs/`` ready and + a fresh repo-memory directory the scheduler can read.""" + monkeypatch.chdir(tmp_path) + (tmp_path / ".autoloop" / "programs").mkdir(parents=True) + # The scheduler reads state from /tmp/gh-aw/repo-memory/autoloop. Clean it + # so each test starts from a known empty slate, then re-populate per-test. + repo_mem = "/tmp/gh-aw/repo-memory/autoloop" + if os.path.isdir(repo_mem): + shutil.rmtree(repo_mem) + os.makedirs(repo_mem, exist_ok=True) + return tmp_path + + +def _write_program(workdir, name, body=None): + p = workdir / ".autoloop" / "programs" / "{}.md".format(name) + p.write_text(body if body is not None else PROGRAM_TEMPLATE.format(name=name)) + return p + + +def _write_state(name, **kwargs): + repo_mem = "/tmp/gh-aw/repo-memory/autoloop" + os.makedirs(repo_mem, exist_ok=True) + with open(os.path.join(repo_mem, "{}.md".format(name)), "w") as f: + f.write(_state_file(name, **kwargs)) + + +# --------------------------------------------------------------------------- +# Scenario coverage +# --------------------------------------------------------------------------- + + +class TestSchedulerEndToEnd: + def test_picks_more_overdue(self, workdir): + """Two programs with different ``last_run`` β†’ the older one is selected.""" + _write_program(workdir, "old") + _write_program(workdir, "fresh") + _write_state("old", last_run="2025-01-01T00:00:00Z") + _write_state("fresh", last_run="2025-01-15T00:00:00Z") + + proc, out = _run_scheduler(str(workdir)) + assert proc.returncode == 0, proc.stderr + assert out["selected"] == "old" + assert out["deferred"] == ["fresh"] + + def test_never_run_beats_recently_run(self, workdir): + """A never-run program is always more overdue than one with state.""" + _write_program(workdir, "veteran") + _write_program(workdir, "rookie") + _write_state("veteran", last_run="2025-01-15T00:00:00Z") + # No state file for "rookie" β†’ first run + + proc, out = _run_scheduler(str(workdir)) + assert proc.returncode == 0, proc.stderr + assert out["selected"] == "rookie" + + def test_missing_state_file_treated_as_first_run(self, workdir): + """A single program with no state file is selected and treated as first run.""" + _write_program(workdir, "lonely") + proc, out = _run_scheduler(str(workdir)) + assert proc.returncode == 0, proc.stderr + assert out["selected"] == "lonely" + assert "no state file found (first run)" in proc.stdout + + def test_paused_program_is_skipped(self, workdir): + """``paused: true`` puts the program in ``skipped`` with a paused reason.""" + _write_program(workdir, "snoozer") + _write_state("snoozer", paused=True, pause_reason="manual") + + proc, out = _run_scheduler(str(workdir)) + # Only one program and it's paused β†’ nothing due β†’ exit 1 + assert proc.returncode == 1 + names = [s["name"] for s in out["skipped"]] + assert "snoozer" in names + reason = next(s["reason"] for s in out["skipped"] if s["name"] == "snoozer") + assert reason.startswith("paused:") + assert "manual" in reason + + def test_completed_program_is_skipped(self, workdir): + """``completed: true`` puts the program in ``skipped``.""" + _write_program(workdir, "graduated") + _write_state("graduated", completed=True) + + proc, out = _run_scheduler(str(workdir)) + assert proc.returncode == 1 + names = [s["name"] for s in out["skipped"]] + assert "graduated" in names + reason = next(s["reason"] for s in out["skipped"] if s["name"] == "graduated") + assert "completed" in reason + + def test_forced_program_bypasses_scheduling(self, workdir): + """``AUTOLOOP_PROGRAM`` forces the named program even if not most-overdue.""" + _write_program(workdir, "old") + _write_program(workdir, "fresh") + _write_state("old", last_run="2025-01-01T00:00:00Z") + _write_state("fresh", last_run="2025-01-15T00:00:00Z") + + # Without forcing, "old" wins; with forcing "fresh" wins. + proc, out = _run_scheduler(str(workdir), forced="fresh") + assert proc.returncode == 0, proc.stderr + assert out["selected"] == "fresh" + assert out["deferred"] == ["old"] + assert "FORCED: running program 'fresh'" in proc.stdout + + def test_forced_program_can_run_paused(self, workdir): + """Forcing a paused program bypasses the skip and selects it anyway.""" + _write_program(workdir, "snoozer") + _write_state("snoozer", paused=True, pause_reason="manual") + + proc, out = _run_scheduler(str(workdir), forced="snoozer") + assert proc.returncode == 0, proc.stderr + assert out["selected"] == "snoozer" + + def test_forced_program_unknown_errors(self, workdir): + """Forcing an unknown program exits non-zero with an error.""" + _write_program(workdir, "real") + proc, _ = _run_scheduler(str(workdir), forced="nonexistent") + assert proc.returncode == 1 + assert "not found" in proc.stdout + + def test_no_programs_found(self, workdir): + """Empty programs dir β†’ ``no_programs: true``, exit 0 (workflow handles bootstrap).""" + # Remove the bootstrapped programs dir so the scheduler has nothing to + # discover after its bootstrap step (which only creates the dir if it's + # missing entirely). + shutil.rmtree(workdir / ".autoloop" / "programs") + proc, out = _run_scheduler(str(workdir)) + # The bootstrap recreates the dir + example template (which contains + # REPLACE placeholders β†’ unconfigured), so there is one unconfigured + # program. Exit 0 because the workflow still wants to surface the + # template via the agent step. + assert proc.returncode == 0, proc.stderr + assert out["unconfigured"] == ["example"] + assert out["selected"] is None + + def test_head_branch_set_when_program_selected(self, workdir): + """``head_branch`` is exactly ``autoloop/{name}`` for the selected program.""" + _write_program(workdir, "coverage") + proc, out = _run_scheduler(str(workdir)) + assert proc.returncode == 0, proc.stderr + assert out["selected"] == "coverage" + assert out["head_branch"] == "autoloop/coverage" + # The bogus DNS repo means the PR API call fails β†’ existing_pr is None. + assert out["existing_pr"] is None + + def test_head_branch_null_when_nothing_selected(self, workdir): + """When no program is due, ``head_branch`` is ``null`` in the output.""" + # Empty programs dir β†’ bootstrap creates an unconfigured template, which + # does NOT count as selected. So head_branch should be null. + shutil.rmtree(workdir / ".autoloop" / "programs") + proc, out = _run_scheduler(str(workdir)) + assert proc.returncode == 0, proc.stderr + assert out["selected"] is None + assert out["head_branch"] is None + assert out["existing_pr"] is None diff --git a/tests/test_scheduling.py b/tests/test_scheduling.py index 4944c3c..7064391 100644 --- a/tests/test_scheduling.py +++ b/tests/test_scheduling.py @@ -1,133 +1,57 @@ -"""Tests for the scheduling pre-step in workflows/autoloop.md. +"""Tests for the standalone Autoloop scheduler. -Functions are extracted directly from the workflow JavaScript heredoc at import -time (see conftest.py) and called via Node.js subprocess β€” there is no separate -copy of the scheduling code. - -For inline logic (slugify, frontmatter parsing, skip conditions, etc.) that -isn't wrapped in a named function in the workflow, we write thin test helpers -that replicate the exact inline pattern. These are documented with the -workflow source patterns they correspond to. +The scheduler module is imported directly (see ``conftest.py``); functions are +exercised in-process. A few thin helpers below match the legacy 2-tuple/no-args +shapes used by the tests, while delegating to the shared scheduler module. """ import re from datetime import datetime, timezone, timedelta -from conftest import _funcs +from conftest import _funcs, autoloop_scheduler # --------------------------------------------------------------------------- -# Functions extracted from the workflow via AST (see conftest.py) +# Functions exposed by the scheduler module # --------------------------------------------------------------------------- parse_schedule = _funcs["parse_schedule"] parse_machine_state = _funcs["parse_machine_state"] get_program_name = _funcs["get_program_name"] parse_link_header = _funcs["parse_link_header"] +is_unconfigured = autoloop_scheduler.is_unconfigured +check_skip_conditions = autoloop_scheduler.check_skip_conditions +select_program = autoloop_scheduler.select_program # --------------------------------------------------------------------------- -# Thin helpers that replicate inline workflow patterns (not function defs). -# Each documents the workflow source lines it mirrors. +# Thin helpers preserving the legacy test-helper shapes. # --------------------------------------------------------------------------- def slugify_issue_title(title): - """Replicates the inline slug logic in the workflow's issue scanning section.""" - slug = re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-') + """Slugify a title (the workflow's inline issue-scanning slug logic). + + The scheduler module's ``slugify_issue_title`` falls back to ``"issue"`` + when no number is provided and the title slugifies to empty; the original + inline workflow code only fell back when ``number`` was known. This helper + preserves the original behaviour by passing through an empty string. + """ + slug = re.sub(r'[^a-z0-9]+', '-', (title or '').lower()).strip('-') slug = re.sub(r'-+', '-', slug) return slug def parse_frontmatter(content): - """Replicates the inline frontmatter parsing in the workflow's program scanning loop.""" - content_stripped = re.sub(r'^(\s*\s*\n)*', '', content, flags=re.DOTALL) - schedule_delta = None - target_metric = None - fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content_stripped, re.DOTALL) - if fm_match: - for line in fm_match.group(1).split("\n"): - if line.strip().startswith("schedule:"): - schedule_str = line.split(":", 1)[1].strip() - schedule_delta = parse_schedule(schedule_str) - if line.strip().startswith("target-metric:"): - try: - target_metric = float(line.split(":", 1)[1].strip()) - except (ValueError, TypeError): - pass + """Two-tuple wrapper over the scheduler's three-tuple frontmatter parser.""" + schedule_delta, target_metric, _ = autoloop_scheduler.parse_program_frontmatter(content) return schedule_delta, target_metric -def is_unconfigured(content): - """Replicates the inline unconfigured check in the workflow's program scanning loop.""" - if "" in content: - return True - if re.search(r'\bTODO\b|\bREPLACE', content): - return True - return False - - -def check_skip_conditions(state): - """Replicates the inline skip logic in the workflow's program scanning loop. - - Returns (should_skip, reason). - """ - # Line 348: completed check - if str(state.get("completed", "")).lower() == "true" or state.get("completed") is True: - return True, "completed: target metric reached" - # Line 353: paused check - if state.get("paused"): - return True, f"paused: {state.get('pause_reason', 'unknown')}" - # Lines 357-361: plateau check - recent = state.get("recent_statuses", [])[-5:] - if len(recent) >= 5 and all(s == "rejected" for s in recent): - return True, "plateau: 5 consecutive rejections" - return False, None - - def check_if_due(schedule_delta, last_run, now): - """Replicates the inline due check in the workflow's program scanning loop. - - Returns (is_due, next_due_iso). - """ + """Replicates the inline due check: ``(is_due, next_due_iso_or_None)``.""" if schedule_delta and last_run: if now - last_run < schedule_delta: return False, (last_run + schedule_delta).isoformat() return True, None -def select_program(due, forced_program=None, all_programs=None, unconfigured=None, issue_programs=None): - """Replicates the selection logic in the workflow's program selection section. - - Returns (selected, selected_file, selected_issue, selected_target_metric, deferred, error). - """ - all_programs = all_programs or {} - unconfigured = unconfigured or [] - issue_programs = issue_programs or {} - - if forced_program: - if forced_program not in all_programs: - return None, None, None, None, [], f"program '{forced_program}' not found" - if forced_program in unconfigured: - return None, None, None, None, [], f"program '{forced_program}' is unconfigured" - selected = forced_program - selected_file = all_programs[forced_program] - deferred = [p["name"] for p in due if p["name"] != forced_program] - selected_issue = issue_programs.get(selected) - selected_target_metric = None - for p in due: - if p["name"] == forced_program: - selected_target_metric = p.get("target_metric") - break - return selected, selected_file, selected_issue, selected_target_metric, deferred, None - elif due: - due.sort(key=lambda p: p["last_run"] or "") - selected = due[0]["name"] - selected_file = due[0]["file"] - selected_target_metric = due[0].get("target_metric") - deferred = [p["name"] for p in due[1:]] - selected_issue = issue_programs.get(selected) - return selected, selected_file, selected_issue, selected_target_metric, deferred, None - - return None, None, None, None, [], None - - # =========================================================================== # Tests # =========================================================================== @@ -617,7 +541,7 @@ def test_forced_program_unconfigured(self): def test_forced_issue_program(self): due = [] all_progs = {"my-issue": "/tmp/gh-aw/issue-programs/my-issue.md"} - issue_progs = {"my-issue": 42} + issue_progs = {"my-issue": {"issue_number": 42, "file": "/tmp/x", "title": "X"}} selected, file, issue, target, deferred, err = select_program( due, forced_program="my-issue", all_programs=all_progs, issue_programs=issue_progs ) @@ -628,7 +552,7 @@ def test_issue_program_selected_normally(self): due = [ {"name": "my-issue", "last_run": None, "file": "/tmp/my-issue.md", "target_metric": None}, ] - issue_progs = {"my-issue": 7} + issue_progs = {"my-issue": {"issue_number": 7, "file": "/tmp/x", "title": "X"}} selected, file, issue, target, deferred, err = select_program( due, issue_programs=issue_progs ) @@ -786,18 +710,31 @@ def _load_steps(self): return step_names def _load_lock_steps(self): - """Return the list of step names from .github/workflows/sync-branches.lock.yml.""" + """Return the list of step names from the agent job in + .github/workflows/sync-branches.lock.yml. + + Parsed with a regex (rather than PyYAML) so the test has no + external dependencies beyond pytest. + """ import os - import yaml lock_path = os.path.join( os.path.dirname(__file__), "..", ".github", "workflows", "sync-branches.lock.yml" ) with open(lock_path) as f: - data = yaml.safe_load(f) - # Collect step names from the 'agent' job - steps = data.get("jobs", {}).get("agent", {}).get("steps", []) - return [s.get("name", "") for s in steps if s.get("name")] + content = f.read() + # Restrict to the 'agent:' job body so we don't pick up step names + # from other jobs (e.g. 'activation'). + agent_match = re.search(r"^ agent:\n((?: .*\n|\n)+)", content, re.MULTILINE) + if not agent_match: + return [] + agent_body = agent_match.group(1) + # Step names appear as either ' - name: ' or + # ' name: ' (when the step starts with '- env:'). + step_names = [] + for m in re.finditer(r'^\s{6,8}(?:- )?name:\s*(.+)$', agent_body, re.MULTILINE): + step_names.append(m.group(1).strip()) + return step_names def test_cred_step_exists(self): """A step that configures Git identity/auth must exist in the source.""" @@ -833,3 +770,208 @@ def test_lock_creds_before_merge(self): f"'Configure Git credentials' (index {cred_idx}) must come before " f"merge step (index {merge_idx}). Steps: {steps}" ) + + +# --------------------------------------------------------------------------- +# Single-PR-per-program invariant: safe-outputs config + existing_pr lookup +# (issue: enforce single-PR-per-program invariant) +# --------------------------------------------------------------------------- + +class TestSafeOutputsConfig: + """Verify the safe-outputs config that defends the single-PR invariant. + + Without `preserve-branch-name: true`, the gh-aw framework auto-suffixes + branch names on every run, breaking the single-long-running-branch model. + Without `max: 1` on both create-pull-request and push-to-pull-request-branch, + the agent could emit a create+create or create+push pair in the same iteration. + """ + + def _frontmatter(self): + import os + wf_path = os.path.join(os.path.dirname(__file__), "..", "workflows", "autoloop.md") + with open(wf_path) as f: + content = f.read() + # Frontmatter is the first --- ... --- block + m = re.match(r"^---\s*\n(.*?)\n---\s*\n", content, re.DOTALL) + assert m, "Could not find YAML frontmatter in workflows/autoloop.md" + return m.group(1) + + def test_create_pr_preserves_branch_name(self): + fm = self._frontmatter() + # create-pull-request block must contain preserve-branch-name: true + m = re.search(r"create-pull-request:\s*\n((?:\s{4}.*\n)+)", fm) + assert m, "Could not find create-pull-request block in safe-outputs" + block = m.group(1) + assert "preserve-branch-name: true" in block, ( + "create-pull-request must set 'preserve-branch-name: true' to keep " + "the canonical branch name autoloop/{program}; otherwise gh-aw " + "appends a hex salt and breaks the single-PR invariant.\n" + f"Block: {block}" + ) + + def test_create_pr_max_is_one(self): + fm = self._frontmatter() + m = re.search(r"create-pull-request:\s*\n((?:\s{4}.*\n)+)", fm) + assert m + block = m.group(1) + assert re.search(r"^\s*max:\s*1\s*$", block, re.MULTILINE), ( + "create-pull-request must set 'max: 1' β€” the invariant is one " + "safe-output of either create or push per iteration, never two.\n" + f"Block: {block}" + ) + + def test_push_to_pr_max_is_one(self): + fm = self._frontmatter() + m = re.search(r"push-to-pull-request-branch:\s*\n((?:\s{4}.*\n)+)", fm) + assert m, "Could not find push-to-pull-request-branch block in safe-outputs" + block = m.group(1) + assert re.search(r"^\s*max:\s*1\s*$", block, re.MULTILINE), ( + "push-to-pull-request-branch must set 'max: 1'.\n" + f"Block: {block}" + ) + + +class TestProseGuidance: + """Verify the prose guidance enforcing the single-PR invariant is present.""" + + def _content(self): + import os + wf_path = os.path.join(os.path.dirname(__file__), "..", "workflows", "autoloop.md") + with open(wf_path) as f: + return f.read() + + def test_branch_name_warning_present(self): + c = self._content() + assert "Branch Name Must Be Exact" in c, ( + "Missing the 'Branch Name Must Be Exact' warning that tells the " + "agent to never use suffixed branch names." + ) + assert "no suffixes" in c.lower(), "Warning should mention 'no suffixes'" + + def test_common_mistakes_section_present(self): + c = self._content() + assert "## Common Mistakes to Avoid" in c, ( + "Missing the 'Common Mistakes to Avoid' section." + ) + + def test_step5_uses_existing_pr(self): + c = self._content() + # Step 5 accept flow must reference existing_pr from autoloop.json + assert "existing_pr" in c, ( + "Workflow prose must instruct the agent to consult the " + "`existing_pr` field from /tmp/gh-aw/autoloop.json." + ) + assert "head_branch" in c, ( + "Workflow prose must instruct the agent to use the `head_branch` " + "field from /tmp/gh-aw/autoloop.json." + ) + + +# --------------------------------------------------------------------------- +# find_existing_pr_for_branch helper β€” tolerant lookup of the open draft PR +# --------------------------------------------------------------------------- + + +def _run_find_existing_pr(program, mock_responses): + """Invoke ``find_existing_pr_for_branch`` with a stubbed HTTP client. + + ``mock_responses`` is a list of dicts: ``{ url_match, status, body, link }``. + The first entry whose ``url_match`` substring is contained in the requested + URL wins. The optional ``link`` field is returned as the Link response + header (used by pagination via ``parse_link_header``). ``status`` is kept + for parity with the previous JS-based stub but only the ``200`` path is + exercised β€” non-200 responses surface as ``(None, None)`` from the real + ``_http_get_json``, which the helper here mirrors when ``status != 200``. + """ + def stub(url, headers, timeout=30): + for r in mock_responses: + if r["url_match"] in url: + if r.get("status", 200) != 200: + return None, None + return r.get("body"), r.get("link") + return None, None + + return autoloop_scheduler.find_existing_pr_for_branch( + "owner/repo", program, "TOKEN", http_get_json=stub + ) + + +class TestFindExistingPRForBranch: + """The tolerant PR lookup that closes the single-PR-per-program invariant.""" + + def test_returns_null_when_no_pr_exists(self): + # Strategy 1 returns []; strategy 2 returns [] + responses = [ + {"url_match": "head=owner%3Aautoloop%2Fcoverage", "status": 200, "body": []}, + {"url_match": "/pulls?state=open", "status": 200, "body": []}, + ] + assert _run_find_existing_pr("coverage", responses) is None + + def test_finds_pr_with_canonical_branch_name(self): + responses = [ + { + "url_match": "head=owner%3Aautoloop%2Fcoverage", + "status": 200, + "body": [{"number": 42, "head": {"ref": "autoloop/coverage"}, "title": "[Autoloop] x"}], + }, + ] + assert _run_find_existing_pr("coverage", responses) == 42 + + def test_finds_pr_with_legacy_hex_suffix(self): + # Strategy 1 finds nothing (the open PR has a suffixed branch name); + # Strategy 2 falls back to listing all open PRs and matches the suffix regex. + responses = [ + {"url_match": "head=owner%3Aautoloop%2Fcoverage", "status": 200, "body": []}, + { + "url_match": "/pulls?state=open", + "status": 200, + "body": [ + {"number": 99, "head": {"ref": "autoloop/coverage-8724e9f9"}, "title": "[Autoloop] x"}, + ], + }, + ] + assert _run_find_existing_pr("coverage", responses) == 99 + + def test_finds_pr_via_title_prefix_fallback(self): + # Branch name doesn't match suffix pattern, but title prefix does + responses = [ + {"url_match": "head=owner%3Aautoloop%2Fcoverage", "status": 200, "body": []}, + { + "url_match": "/pulls?state=open", + "status": 200, + "body": [ + {"number": 7, "head": {"ref": "totally-different-branch"}, "title": "[Autoloop: coverage] iter 3"}, + ], + }, + ] + assert _run_find_existing_pr("coverage", responses) == 7 + + def test_does_not_match_unrelated_program(self): + # autoloop/coverage-extras is a different program, not a hex suffix + responses = [ + {"url_match": "head=owner%3Aautoloop%2Fcoverage", "status": 200, "body": []}, + { + "url_match": "/pulls?state=open", + "status": 200, + "body": [ + {"number": 11, "head": {"ref": "autoloop/coverage-extras"}, "title": "[Autoloop] other"}, + ], + }, + ] + assert _run_find_existing_pr("coverage", responses) is None + + def test_does_not_match_other_program_with_similar_name(self): + # Program name with regex-special-ish characters (underscore is fine, but + # we want to make sure the regex is properly anchored to ^...$). + responses = [ + {"url_match": "head=owner%3Aautoloop%2Fsignal_processing", "status": 200, "body": []}, + { + "url_match": "/pulls?state=open", + "status": 200, + "body": [ + # Branch for a different program that happens to share a prefix + {"number": 5, "head": {"ref": "autoloop/signal"}, "title": "[Autoloop] other"}, + ], + }, + ] + assert _run_find_existing_pr("signal_processing", responses) is None diff --git a/workflows/autoloop.md b/workflows/autoloop.md index f9f4a4c..5f14579 100644 --- a/workflows/autoloop.md +++ b/workflows/autoloop.md @@ -44,11 +44,12 @@ safe-outputs: title-prefix: "[Autoloop] " labels: [automation, autoloop] protected-files: fallback-to-issue - max: 2 + preserve-branch-name: true + max: 1 push-to-pull-request-branch: target: "*" title-prefix: "[Autoloop] " - max: 2 + max: 1 create-issue: title-prefix: "[Autoloop] " labels: [automation, autoloop] @@ -114,484 +115,7 @@ steps: GITHUB_REPOSITORY: ${{ github.repository }} AUTOLOOP_PROGRAM: ${{ github.event.inputs.program }} run: | - node - << 'JSEOF' - const fs = require('fs'); - const path = require('path'); - - const programsDir = '.autoloop/programs'; - const autoloopDir = '.autoloop/programs'; - const templateFile = path.join(autoloopDir, 'example.md'); - - // Read program state from repo-memory (persistent git-backed storage) - const githubToken = process.env.GITHUB_TOKEN || ''; - const repo = process.env.GITHUB_REPOSITORY || ''; - const forcedProgram = (process.env.AUTOLOOP_PROGRAM || '').trim(); - - // Repo-memory files are cloned to /tmp/gh-aw/repo-memory/{id}/ where {id} - // is derived from the branch-name configured in the tools section (memory/autoloop -> autoloop) - const repoMemoryDir = '/tmp/gh-aw/repo-memory/autoloop'; - - function parseMachineState(content) { - const state = {}; - const sectionMatch = content.match(/## βš™οΈ Machine State[^\n]*\n([\s\S]*?)(?=\n## |$)/); - if (!sectionMatch) return state; - const section = sectionMatch[0]; - const rowRegex = /\|\s*(.+?)\s*\|\s*(.+?)\s*\|/g; - let row; - while ((row = rowRegex.exec(section)) !== null) { - const rawKey = row[1].trim(); - const rawVal = row[2].trim(); - if (['field', '---', ':---', ':---:', '---:'].includes(rawKey.toLowerCase())) continue; - const key = rawKey.toLowerCase().replace(/ /g, '_'); - const val = ['\u2014', '-', ''].includes(rawVal) ? null : rawVal; // \u2014 = em dash - state[key] = val; - } - // Coerce types - for (const intField of ['iteration_count', 'consecutive_errors']) { - if (intField in state) { - const n = parseInt(state[intField], 10); - state[intField] = isNaN(n) ? 0 : n; - } - } - if ('paused' in state) { - state.paused = String(state.paused || '').toLowerCase() === 'true'; - } - if ('completed' in state) { - state.completed = String(state.completed || '').toLowerCase() === 'true'; - } - // recent_statuses: stored as comma-separated words (e.g. "accepted, rejected, error") - const rsRaw = state.recent_statuses || ''; - if (rsRaw) { - state.recent_statuses = rsRaw.split(',').map(s => s.trim().toLowerCase()).filter(s => s); - } else { - state.recent_statuses = []; - } - return state; - } - - function readProgramState(programName) { - const stateFile = path.join(repoMemoryDir, programName + '.md'); - try { - if (!fs.statSync(stateFile).isFile()) { - console.log(' ' + programName + ': no state file found (first run)'); - return {}; - } - } catch (e) { - console.log(' ' + programName + ': no state file found (first run)'); - return {}; - } - const content = fs.readFileSync(stateFile, 'utf-8'); - return parseMachineState(content); - } - - function getStateFileSize(programName) { - // Returns the size of the program's state file in bytes, or 0 if it - // does not exist. Surfaced in autoloop.json so the agent can decide - // whether to compact aggressively this iteration. - const stateFile = path.join(repoMemoryDir, programName + '.md'); - try { - const st = fs.statSync(stateFile); - return st.isFile() ? st.size : 0; - } catch (e) { - return 0; - } - } - - // Schedule string to milliseconds - function parseSchedule(s) { - s = s.trim().toLowerCase(); - let m = s.match(/^every\s+(\d+)\s*h/); - if (m) return parseInt(m[1], 10) * 3600 * 1000; - m = s.match(/^every\s+(\d+)\s*m/); - if (m) return parseInt(m[1], 10) * 60 * 1000; - if (s === 'daily') return 24 * 3600 * 1000; - if (s === 'weekly') return 7 * 24 * 3600 * 1000; - return null; - } - - function getProgramName(pf) { - // Extract program name from file path. - // Directory-based: .autoloop/programs//program.md -> - // Bare markdown: .autoloop/programs/.md -> - // Issue-based: /tmp/gh-aw/issue-programs/.md -> - if (pf.endsWith('/program.md')) { - return path.basename(path.dirname(pf)); - } else { - return path.parse(pf).name; - } - } - - // Parse the GitHub API Link header to extract the "next" page URL. - // Returns the URL string for the next page, or null if there is none. - function parseLinkHeader(header) { - if (!header) return null; - var parts = header.split(','); - for (var i = 0; i < parts.length; i++) { - var section = parts[i].trim(); - var m = section.match(/^<([^>]+)>;\s*rel="next"$/); - if (m) return m[1]; - } - return null; - } - - // Main execution - async function main() { - // Bootstrap: create autoloop programs directory and template if missing - if (!fs.existsSync(autoloopDir)) { - fs.mkdirSync(autoloopDir, { recursive: true }); - const bt = String.fromCharCode(96); // backtick -- avoid literal backticks that break gh-aw compiler - const template = [ - '', - '', - '', - '', - '# Autoloop Program', - '', - '', - '', - '## Goal', - '', - "", - '', - 'REPLACE THIS with your optimization goal.', - '', - '## Target', - '', - '', - '', - 'Only modify these files:', - '- ' + bt + 'REPLACE_WITH_FILE' + bt + ' -- (describe what this file does)', - '', - 'Do NOT modify:', - '- (list files that must not be touched)', - '', - '## Evaluation', - '', - '', - '', - bt + bt + bt + 'bash', - 'REPLACE_WITH_YOUR_EVALUATION_COMMAND', - bt + bt + bt, - '', - 'The metric is ' + bt + 'REPLACE_WITH_METRIC_NAME' + bt + '. **Lower/Higher is better.** (pick one)', - '', - ].join('\n'); - fs.writeFileSync(templateFile, template); - console.log('BOOTSTRAPPED: created ' + templateFile + ' locally (agent will create a draft PR)'); - } - - // Find all program files from all locations: - // 1. Directory-based programs: .autoloop/programs//program.md (preferred) - // 2. Bare markdown programs: .autoloop/programs/.md (simple) - // 3. Issue-based programs: GitHub issues with the 'autoloop-program' label - let programFiles = []; - const issuePrograms = {}; - - // Scan .autoloop/programs/ for directory-based programs - if (fs.existsSync(programsDir)) { - try { - if (fs.statSync(programsDir).isDirectory()) { - const entries = fs.readdirSync(programsDir).sort(); - for (const entry of entries) { - const progDir = path.join(programsDir, entry); - try { - if (fs.statSync(progDir).isDirectory()) { - const progFile = path.join(progDir, 'program.md'); - try { - if (fs.statSync(progFile).isFile()) { - programFiles.push(progFile); - } - } catch (e) { /* file doesn't exist */ } - } - } catch (e) { /* stat failed */ } - } - } - } catch (e) { /* stat failed */ } - } - - // Scan .autoloop/programs/ for bare markdown programs - if (fs.existsSync(autoloopDir)) { - try { - if (fs.statSync(autoloopDir).isDirectory()) { - const barePrograms = fs.readdirSync(autoloopDir) - .filter(f => f.endsWith('.md')) - .sort() - .map(f => path.join(autoloopDir, f)); - for (const pf of barePrograms) { - programFiles.push(pf); - } - } - } catch (e) { /* stat failed */ } - } - - // Scan GitHub issues with the 'autoloop-program' label (paginated) - const issueProgramsDir = '/tmp/gh-aw/issue-programs'; - fs.mkdirSync(issueProgramsDir, { recursive: true }); - try { - let nextUrl = 'https://api.github.com/repos/' + repo + '/issues?labels=autoloop-program&state=open&per_page=100'; - const issues = []; - while (nextUrl) { - const response = await fetch(nextUrl, { - headers: { - 'Authorization': 'token ' + githubToken, - 'Accept': 'application/vnd.github.v3+json', - }, - }); - const page = await response.json(); - issues.push(...page); - nextUrl = parseLinkHeader(response.headers.get('link')); - } - for (const issue of issues) { - if (issue.pull_request) continue; // skip PRs - const body = issue.body || ''; - const title = issue.title || ''; - const number = issue.number; - // Derive program name from issue title: slugify to lowercase with hyphens - let slug = title.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, ''); - slug = slug.replace(/-+/g, '-'); // collapse consecutive hyphens - if (!slug) slug = 'issue-' + number; - // Avoid slug collisions: if another issue already claimed this slug, append issue number - if (slug in issuePrograms) { - console.log(" Warning: slug '" + slug + "' (issue #" + number + ") collides with issue #" + issuePrograms[slug].issue_number + ", appending issue number"); - slug = slug + '-' + number; - } - // Write issue body to a temp file so the scheduling loop can process it - const issueFile = path.join(issueProgramsDir, slug + '.md'); - fs.writeFileSync(issueFile, body); - programFiles.push(issueFile); - issuePrograms[slug] = { issue_number: number, file: issueFile, title: title }; - console.log(" Found issue-based program: '" + slug + "' (issue #" + number + ")"); - } - } catch (e) { - console.log(' Warning: could not fetch issue-based programs: ' + e.message); - } - - if (programFiles.length === 0) { - // Fallback to single-file locations - for (const p of ['.autoloop/program.md', 'program.md']) { - try { - if (fs.statSync(p).isFile()) { - programFiles = [p]; - break; - } - } catch (e) { /* file doesn't exist */ } - } - } - - if (programFiles.length === 0) { - console.log('NO_PROGRAMS_FOUND'); - fs.mkdirSync('/tmp/gh-aw', { recursive: true }); - fs.writeFileSync('/tmp/gh-aw/autoloop.json', JSON.stringify( - { due: [], skipped: [], unconfigured: [], no_programs: true } - )); - process.exit(0); - } - - fs.mkdirSync('/tmp/gh-aw', { recursive: true }); - const now = new Date(); - const due = []; - const skipped = []; - const unconfigured = []; - const allPrograms = {}; - - for (const pf of programFiles) { - const name = getProgramName(pf); - allPrograms[name] = pf; - const content = fs.readFileSync(pf, 'utf-8'); - - // Check sentinel (skip for issue-based programs which use AUTOLOOP:ISSUE-PROGRAM) - if (content.includes('')) { - unconfigured.push(name); - continue; - } - - // Check for TODO/REPLACE placeholders - if (/\bTODO\b|\bREPLACE/.test(content)) { - unconfigured.push(name); - continue; - } - - // Parse optional YAML frontmatter for schedule and target-metric - // Strip leading HTML comments before checking (issue-based programs may have them) - const contentStripped = content.replace(/^(\s*\s*\n)*/, ''); - let scheduleDelta = null; - let targetMetric = null; - const fmMatch = contentStripped.match(/^---\s*\n([\s\S]*?)\n---\s*\n/); - if (fmMatch) { - for (const line of fmMatch[1].split('\n')) { - if (line.trim().startsWith('schedule:')) { - const scheduleStr = line.substring(line.indexOf(':') + 1).trim(); - scheduleDelta = parseSchedule(scheduleStr); - } - if (line.trim().startsWith('target-metric:')) { - const val = parseFloat(line.substring(line.indexOf(':') + 1).trim()); - if (!isNaN(val)) { - targetMetric = val; - } else { - console.log(' Warning: ' + name + ' has invalid target-metric value: ' + line.substring(line.indexOf(':') + 1).trim()); - } - } - } - } - - // Read state from repo-memory - const state = readProgramState(name); - if (state && Object.keys(state).length > 0) { - console.log(' ' + name + ': last_run=' + (state.last_run || null) + ', iteration_count=' + (state.iteration_count != null ? state.iteration_count : null)); - } else { - console.log(' ' + name + ': no state found (first run)'); - } - - let lastRun = null; - const lr = state.last_run || null; - if (lr) { - try { - const d = new Date(lr); - if (!isNaN(d.getTime())) lastRun = d; - } catch (e) { - // ignore invalid date - } - } - - // Check if completed (target metric was reached) - if (String(state.completed || '').toLowerCase() === 'true') { - skipped.push({ name: name, reason: 'completed: target metric reached' }); - continue; - } - - // Check if paused (e.g., plateau or recurring errors) - if (state.paused) { - skipped.push({ name: name, reason: 'paused: ' + (state.pause_reason || 'unknown') }); - continue; - } - - // Auto-pause on plateau: 5+ consecutive rejections - const recent = (state.recent_statuses || []).slice(-5); - if (recent.length >= 5 && recent.every(s => s === 'rejected')) { - skipped.push({ name: name, reason: 'plateau: 5 consecutive rejections' }); - continue; - } - - // Check if due based on per-program schedule - if (scheduleDelta && lastRun) { - if (now.getTime() - lastRun.getTime() < scheduleDelta) { - skipped.push({ - name: name, - reason: 'not due yet', - next_due: new Date(lastRun.getTime() + scheduleDelta).toISOString(), - }); - continue; - } - } - - due.push({ name: name, last_run: lr, file: pf, target_metric: targetMetric }); - } - - // Pick the program to run - let selected = null; - let selectedFile = null; - let selectedIssue = null; - let selectedTargetMetric = null; - let deferred = []; - - if (forcedProgram) { - // Manual dispatch requested a specific program -- bypass scheduling - // (paused, not-due, and plateau programs can still be forced) - if (!(forcedProgram in allPrograms)) { - console.log("ERROR: requested program '" + forcedProgram + "' not found."); - console.log(' Available programs: ' + JSON.stringify(Object.keys(allPrograms))); - process.exit(1); - } - if (unconfigured.includes(forcedProgram)) { - console.log("ERROR: requested program '" + forcedProgram + "' is unconfigured (has placeholders)."); - process.exit(1); - } - selected = forcedProgram; - selectedFile = allPrograms[forcedProgram]; - deferred = due.filter(p => p.name !== forcedProgram).map(p => p.name); - if (selected in issuePrograms) { - selectedIssue = issuePrograms[selected].issue_number; - } - // Find target_metric: check the due list first, then parse from the program file - for (const p of due) { - if (p.name === forcedProgram) { - selectedTargetMetric = p.target_metric || null; - break; - } - } - if (selectedTargetMetric === null) { - // Program may have been skipped (completed/paused/plateau) -- parse directly - try { - const _content = fs.readFileSync(selectedFile, 'utf-8'); - const _contentStripped = _content.replace(/^(\s*\s*\n)*/, ''); - const _fm = _contentStripped.match(/^---\s*\n([\s\S]*?)\n---\s*\n/); - if (_fm) { - for (const _line of _fm[1].split('\n')) { - if (_line.trim().startsWith('target-metric:')) { - const val = parseFloat(_line.substring(_line.indexOf(':') + 1).trim()); - if (!isNaN(val)) { - selectedTargetMetric = val; - break; - } - } - } - } - } catch (e) { /* ignore */ } - } - console.log("FORCED: running program '" + forcedProgram + "' (manual dispatch)"); - } else if (due.length > 0) { - // Normal scheduling: pick the single most-overdue program - due.sort((a, b) => (a.last_run || '').localeCompare(b.last_run || '')); // null/empty sorts first (never run) - selected = due[0].name; - selectedFile = due[0].file; - selectedTargetMetric = due[0].target_metric || null; - deferred = due.slice(1).map(p => p.name); - // Check if the selected program is issue-based - if (selected in issuePrograms) { - selectedIssue = issuePrograms[selected].issue_number; - } - } - - const issueProgramsMap = {}; - for (const [name, info] of Object.entries(issuePrograms)) { - issueProgramsMap[name] = info.issue_number; - } - - const notDue = !selected && unconfigured.length === 0; - const result = { - selected: selected, - selected_file: selectedFile, - selected_issue: selectedIssue, - selected_target_metric: selectedTargetMetric, - state_file_size_bytes: selected ? getStateFileSize(selected) : 0, - state_file_max_bytes: 30720, - issue_programs: issueProgramsMap, - deferred: deferred, - skipped: skipped, - unconfigured: unconfigured, - no_programs: false, - not_due: notDue, - }; - - fs.mkdirSync('/tmp/gh-aw', { recursive: true }); - fs.writeFileSync('/tmp/gh-aw/autoloop.json', JSON.stringify(result, null, 2)); - - console.log('=== Autoloop Program Check ==='); - console.log('Selected program: ' + (selected || '(none)') + ' (' + (selectedFile || 'n/a') + ')'); - console.log('Deferred (next run): ' + (deferred.length > 0 ? JSON.stringify(deferred) : '(none)')); - console.log('Programs skipped: ' + (skipped.length > 0 ? JSON.stringify(skipped.map(s => s.name)) : '(none)')); - console.log('Programs unconfigured: ' + (unconfigured.length > 0 ? JSON.stringify(unconfigured) : '(none)')); - - if (!selected && unconfigured.length === 0) { - console.log('\nNo programs due this run. Exiting early.'); - process.exit(0); - } - } - - main().catch(err => { console.error(err.message || err); process.exit(1); }); - JSEOF + python3 .github/workflows/scripts/autoloop_scheduler.py source: githubnext/autoloop engine: copilot @@ -678,6 +202,8 @@ The pre-step has already determined which program to run. Read `/tmp/gh-aw/autol - **`skipped`**: Programs not due yet based on their per-program schedule. - **`no_programs`**: If `true`, no program files exist at all. - **`not_due`**: If `true`, programs exist but none are due for this run. +- **`head_branch`**: The canonical long-running branch name for the selected program β€” always exactly `autoloop/{program-name}`, never with a suffix or hash. Use this value verbatim when creating, checking out, or pushing to the branch. +- **`existing_pr`**: The number of the open draft PR for `autoloop/{program-name}`, or `null` if no PR exists yet. Use this to enforce the single-PR-per-program invariant β€” see [Step 5a: Push and wait for CI](#step-5a-push-and-wait-for-ci) and [Step 5c: Accept](#step-5c-accept). If `selected` is not null: 1. Read the program file from the `selected_file` path. @@ -709,7 +235,7 @@ GitHub Issues (labeled 'autoloop-program'): Each program runs independently with its own: - Goal, target files, and evaluation command - Metric tracking and best-metric history -- Steering issue: `[Autoloop: {program-name}] Steering` (persistent, links branch/PR/state) +- Program issue: `[Autoloop: {program-name}]` (a single GitHub issue labeled `autoloop-program` β€” created automatically for file-based programs, the source issue for issue-based programs β€” that hosts the status comment, per-iteration comments, and human steering) - Long-running branch: `autoloop/{program-name}` (persists across iterations) - Single draft PR per program: `[Autoloop: {program-name}]` (accumulates all accepted iterations) - State file: `{program-name}.md` in repo-memory (all state: scheduling, research context, iteration history) @@ -781,6 +307,16 @@ Examples: - `autoloop/signal_processing` - `autoloop/coverage` +> ⚠️ **CRITICAL β€” Branch Name Must Be Exact** +> +> The branch name is ALWAYS exactly `autoloop/{program-name}` β€” **no suffixes, no hashes, no run IDs, no iteration numbers, no random tokens**. Never create branches like: +> - ❌ `autoloop/coverage-abc123` +> - ❌ `autoloop/coverage-iter42-deadbeef` +> - ❌ `autoloop/coverage-1234567890` +> +> **Never let the gh-aw framework auto-generate a branch name.** You must explicitly name the branch when creating it. The pre-step provides the canonical name in the `head_branch` field of `/tmp/gh-aw/autoloop.json` β€” always use that value verbatim. + + ### How It Works 1. On the **first accepted iteration**, the branch is created from the default branch. @@ -795,10 +331,10 @@ Examples: Each program has three coordinated resources: - **Branch + PR**: `autoloop/{program-name}` with a single draft PR -- **Steering Issue**: `[Autoloop: {program-name}] Steering` β€” persistent GitHub issue linking branch, PR, and state +- **Program Issue**: `[Autoloop: {program-name}]` β€” a single GitHub issue (labeled `autoloop-program`) that hosts the status comment, per-iteration comments, and human steering. For issue-based programs this is the source issue. For file-based programs it is auto-created on the first run. - **State File**: `{program-name}.md` in repo-memory β€” all state, history, and research context -All three reference each other. The steering issue is created on the first accepted iteration and updated with links to the PR and state. +All three reference each other. The program issue is created (or, for issue-based programs, adopted) on the first run and updated with links to the PR and state. ## Iteration Loop @@ -831,10 +367,53 @@ Each run executes **one iteration for the single selected program**: ### Step 3: Implement -1. Check out the program's long-running branch `autoloop/{program-name}`. If the branch does not yet exist, create it from the default branch. If it does exist: - - Fetch the default branch: `git fetch origin main`. - - Check whether the branch's changes have already been merged into main. If `git diff origin/main..autoloop/{program-name}` produces no output (i.e., every change on the branch is already on main), the branch is stale β€” **reset it to `origin/main`**: `git reset --hard origin/main`. - - Otherwise, merge the default branch into the long-running branch to pick up any upstream changes. +1. Check out the program's long-running branch `autoloop/{program-name}`, syncing it with the default branch using an explicit four-case decision tree based on commit ahead/behind counts. Run the following script (substituting `{program-name}`): + + ```bash + git fetch origin main + if git ls-remote --exit-code origin autoloop/{program-name}; then + # Branch exists β€” fetch it too so the ahead/behind counts below are + # computed against up-to-date local copies of the remote tips. + git fetch origin autoloop/{program-name} + + ahead=$(git rev-list --count origin/main..origin/autoloop/{program-name}) + behind=$(git rev-list --count origin/autoloop/{program-name}..origin/main) + + if [ "$ahead" = "0" ] && [ "$behind" != "0" ]; then + # All of the branch's commits are already in main (typical case after a + # successful merge of the previous iteration's PR). A merge here would + # produce a noisy "Merge main into branch" commit that re-exposes every + # historical file as a patch touch β€” the failure mode that triggers + # gh-aw's E003 (>100 files) when a new PR is opened. Fast-forward the + # canonical branch to main instead. This is lossless because ahead=0 + # proves every commit on the branch is already reachable from main. + git checkout -B autoloop/{program-name} origin/main + git push --force-with-lease origin autoloop/{program-name} + elif [ "$ahead" != "0" ] && [ "$behind" != "0" ]; then + # True divergence: branch has unique commits AND main has moved on. + git checkout -B autoloop/{program-name} origin/autoloop/{program-name} + git merge origin/main --no-edit -m "Merge main into autoloop/{program-name}" + else + # Already at main (ahead=0, behind=0) or only ahead of main (ahead>0, + # behind=0). Nothing to merge β€” just check out the branch. + git checkout -B autoloop/{program-name} origin/autoloop/{program-name} + fi + else + # Branch does not exist β€” create it from the default branch + git checkout -b autoloop/{program-name} origin/main + fi + ``` + + The four cases: + + | ahead | behind | Action | Rationale | + |---|---|---|---| + | 0 | 0 | checkout (nothing to do) | branch is exactly at main | + | 0 | >0 | **fast-forward + force-push** | branch's commits already in main; merging would produce noisy merge commit | + | >0 | 0 | checkout (nothing to do) | unique work preserved; no upstream drift to merge | + | >0 | >0 | checkout + merge | true divergence | + + Use `--force-with-lease` rather than `--force` so that if anyone else is simultaneously pushing to the branch, the update is rejected rather than overwriting their commits. 2. Make the proposed changes to the target files only. 3. **Respect the program constraints**: do not modify files outside the target list. @@ -846,24 +425,79 @@ Each run executes **one iteration for the single selected program**: ### Step 5: Accept or Reject -**If the metric improved** (or this is the first run establishing a baseline): +The sandbox-computed metric is necessary but **not sufficient** for acceptance. The agent's sandbox cannot reliably install many project toolchains (e.g., `bun`, `tsc`, `cargo`, `go`, `pytest`) due to network restrictions on asset hosts, so a "metric improved" signal from the sandbox can mask broken commits (e.g., type-check or test failures the sandbox couldn't observe). Acceptance must therefore be gated on **CI green** for the pushed HEAD commit. If CI fails, attempt to fix-and-retry within the same iteration rather than reverting β€” reverting throws away mostly-correct work and creates `commitβ†’revertβ†’commit` churn on the branch. + +The accept path is split into three sub-steps: **5a (push and wait for CI)**, **5b (fix loop)**, **5c (accept)**. + +**If the metric did not improve**, jump straight to the "metric did not improve" path below β€” no push, no CI gate. + +#### Step 5a: Push and wait for CI + +**Only entered if the metric improved** (or this is the first run establishing a baseline). + 1. Commit the changes to the long-running branch `autoloop/{program-name}` with a commit message referencing the actions run: - Commit message subject line: `[Autoloop: {program-name}] Iteration : ` - Commit message body (after a blank line): `Run: {run_url}` referencing the GitHub Actions run URL. 2. Push the commit to the long-running branch. -3. If a draft PR does not already exist for this branch, create one: +3. **Find or create the PR** so CI runs and `gh pr checks` has a target. Follow these steps in order: + a. Check `existing_pr` from `/tmp/gh-aw/autoloop.json`. If it is not null, that is the existing draft PR β€” use it as `$EXISTING_PR` below; **never** call `create-pull-request`. + b. If `existing_pr` is null, also check the `PR` field in the state file's **βš™οΈ Machine State** table as a fallback. Verify it is still open via the GitHub API; if it has been closed or merged, treat it as if no PR exists and proceed to step (c). + c. If no PR exists (both sources are null): create one with `create-pull-request`, specifying `branch: autoloop/{program-name}` (the value of `head_branch` from `autoloop.json`) explicitly β€” do not let the framework auto-generate a branch name. See Step 5c for the title/body format. +4. Wait for CI on the new HEAD and reduce all check-runs to a single status β€” `success`, `failure`, or `pending`: + + ```bash + PR=${EXISTING_PR:-$(gh pr list --head autoloop/{program-name} --json number -q '.[0].number')} + gh pr checks "$PR" --watch --interval 30 || true + status=$(gh pr checks "$PR" --json conclusion,state -q '.[] | (.conclusion // .state // "")' \ + | awk ' + BEGIN { r = "success" } + /^(FAILURE|CANCELLED|TIMED_OUT|ACTION_REQUIRED|STARTUP_FAILURE|STALE)$/ { r = "failure" } + /^(PENDING|QUEUED|IN_PROGRESS|WAITING|REQUESTED)$/ { if (r == "success") r = "pending" } + END { print r }') + ``` + + Three outcomes: `success`, `failure`, or `pending`. `pending` should be rare given `--watch`, but the awk fallback is defensive β€” never accept on `pending`. Treat `pending` as a non-terminal state: re-run the `gh pr checks --watch` step (it does not consume a fix attempt and the per-attempt `--watch` time still counts toward the 60-min wall-clock cap from Step 5b). If `pending` persists past the wall-clock cap, fall through to the `ci-timeout` handling in Step 5b.7. + +5. If `status == "success"`, proceed to **Step 5c**. If `status == "failure"`, proceed to **Step 5b**. If `status == "pending"`, re-run this step (subject to the wall-clock cap defined in Step 5b.7). + +#### Step 5b: Fix loop (up to 5 attempts per iteration) + +If `status == "failure"`, **fix and retry β€” do not revert, do not accept**: + +1. **Fetch the failing check-run logs** for the pushed SHA via `gh run view --log` or the Checks API. +2. **Extract a structured failure summary**: + - Failing job names and the first error line for each. + - **A failure signature** β€” a stable, normalized fingerprint of the failures (e.g., sorted failing-test names + the top error code, like `TS2339:fromArrays:tests/stats/eval_query.test.ts`). The signature is what the no-progress guard compares. + + *(The shared failure-signature extractor lives in the scheduler helper module β€” see issue #34 for the implementation.)* +3. **No-progress guard**: if this attempt's failure signature exactly matches the previous attempt's signature, **stop**. The agent is stuck in a repeat-loop. Set `paused: true` on the state file with `pause_reason: "stuck in CI fix loop: "`, append `"ci-fix-exhausted"` to `recent_statuses`, comment on the program issue with the signature and the three most recent attempts, and end the iteration. +4. **Attempt the fix**: feed the structured failure summary back to the agent as the next sub-task (e.g., "CI failed on ``. Here are the failures: `<…>`. Fix them and push again."). The agent commits the fix and pushes. +5. **Loop back to Step 5a** with the new HEAD. +6. **Budget: 5 fix attempts per iteration.** If the 5th attempt still leaves CI red, set `paused: true` with `pause_reason: "ci-fix-exhausted: "`, append `"ci-fix-exhausted"` to `recent_statuses`, comment on the program issue, and end the iteration. +7. **Wall-clock cap: 60 min per iteration** including all CI waits across attempts. If exceeded mid-fix, set `paused: true` with `pause_reason: "ci-timeout"`, append `"ci-fix-exhausted"` to `recent_statuses`, leave the current branch state in place, and end the iteration. + +#### Step 5c: Accept + +**Only entered when `status == "success"`** from Step 5a (possibly after one or more fix attempts in Step 5b). + +1. The commit(s) are already on the long-running branch (pushed in Step 5a / 5b). No further pushing needed. +2. If a draft PR does not already exist for this branch (i.e., `existing_pr` from `autoloop.json` is null AND the state file's `PR` field is null or refers to a closed PR), create one β€” specify `branch: autoloop/{program-name}` (the value of `head_branch` from `autoloop.json`) explicitly so the framework does not auto-generate a branch name: - Title: `[Autoloop: {program-name}]` - - Body includes: a summary of the program goal, link to the steering issue, the current best metric, and AI disclosure: `πŸ€– *This PR is maintained by Autoloop. Each accepted iteration adds a commit to this branch.*` - If a draft PR already exists, update the PR body with the latest metric and a summary of the most recent accepted iteration. Add a comment to the PR summarizing the iteration: what changed, old metric, new metric, improvement delta, and a link to the actions run. -4. Ensure the steering issue exists (see [Steering Issue](#steering-issue) below). Add a comment to the steering issue linking to the commit and actions run. + - Body includes: a summary of the program goal, link to the program issue, the current best metric, and AI disclosure: `πŸ€– *This PR is maintained by Autoloop. Each accepted iteration adds a commit to this branch.*` + If a draft PR already exists, use `push-to-pull-request-branch` (never `create-pull-request`). Update the PR body with the latest metric and a summary of the most recent accepted iteration. Add a comment to the PR summarizing the iteration: what changed, old metric, new metric, improvement delta, the **fix-attempt count** if `> 0`, and a link to the actions run. +4. Ensure the program issue exists (see [Program Issue](#program-issue) below) β€” for file-based programs that have no program issue yet (`selected_issue` is null in `/tmp/gh-aw/autoloop.json`), create one and record its number in the state file's `Issue` field. 5. Update the state file `{program-name}.md` in the repo-memory folder: - Update the **βš™οΈ Machine State** table: reset `consecutive_errors` to 0, set `best_metric`, increment `iteration_count`, set `last_run` to current UTC timestamp, append `"accepted"` to `recent_statuses` (keep last 10), set `paused` to false. - - Prepend an entry to **πŸ“Š Iteration History** (newest first) with status βœ…, metric, PR link, and a one-line summary of what changed and why it worked. + - Prepend an entry to **πŸ“Š Iteration History** (newest first) with status βœ…, metric, PR link, the fix-attempt count if `> 0`, and a one-line summary of what changed and why it worked. - Update **πŸ“š Lessons Learned** if this iteration revealed something new about the problem or what works. - Update **πŸ”­ Future Directions** if this iteration opened new promising paths. -6. **If this is an issue-based program** (`selected_issue` is not null): update the status comment and post a per-run comment on the source issue (see [Issue-Based Program Updates](#issue-based-program-updates)). +6. **Update the program issue**: edit the status comment and post a per-iteration comment on the program issue (see [Program Issue](#program-issue)). Note the fix-attempt count in the per-iteration comment if `> 0`. 7. **Check halting condition** (see [Halting Condition](#halting-condition)): If the program has a `target-metric` in its frontmatter and the new `best_metric` meets or surpasses the target, mark the program as completed. +#### Coordination with PR-health-keeper workflows + +If a repo ships a companion PR-health-keeper workflow (e.g., an "Evergreen" workflow that fixes failing CI on open PRs), it should be able to pick up paused Autoloop PRs using the same rules as human-authored PRs. The handoff is via the `pause_reason` field β€” `ci-fix-exhausted: `, `stuck in CI fix loop: `, and `ci-timeout` are all signals that the branch is red and needs an external nudge. Absent such a workflow, the loud pause + structured reason gives a human enough signal to intervene. + **If the metric did not improve**: 1. Discard the code changes (do not commit them to the long-running branch). 2. Update the state file `{program-name}.md` in the repo-memory folder: @@ -871,7 +505,7 @@ Each run executes **one iteration for the single selected program**: - Prepend an entry to **πŸ“Š Iteration History** with status ❌, metric, and a one-line summary of what was tried. - If this approach is conclusively ruled out (e.g., tried multiple variations and all fail), add it to **🚧 Foreclosed Avenues** with a clear explanation. - Update **πŸ”­ Future Directions** if this rejection clarified what to try next. -3. **If this is an issue-based program** (`selected_issue` is not null): update the status comment and post a per-run comment on the source issue (see [Issue-Based Program Updates](#issue-based-program-updates)). +3. **Update the program issue**: edit the status comment and post a per-iteration comment on the program issue (see [Program Issue](#program-issue)). **If evaluation could not run** (build failure, missing dependencies, etc.): 1. Discard the code changes (do not commit them to the long-running branch). @@ -879,50 +513,33 @@ Each run executes **one iteration for the single selected program**: - Update the **βš™οΈ Machine State** table: increment `consecutive_errors`, increment `iteration_count`, set `last_run`, append `"error"` to `recent_statuses` (keep last 10). - If `consecutive_errors` reaches 3+, set `paused` to `true` and set `pause_reason` in the Machine State table, and create an issue describing the problem. - Prepend an entry to **πŸ“Š Iteration History** with status ⚠️ and a brief error description. -3. **If this is an issue-based program** (`selected_issue` is not null): update the status comment and post a per-run comment on the source issue (see [Issue-Based Program Updates](#issue-based-program-updates)). - -## Steering Issue - -Maintain a single **persistent** open issue per program titled `[Autoloop: {program-name}] Steering`. The steering issue lives for the entire lifetime of the program. +3. **Update the program issue**: edit the status comment and post a per-iteration comment on the program issue (see [Program Issue](#program-issue)). -The steering issue serves as the central coordination point linking together the program's key resources: -- The **long-running branch** `autoloop/{program-name}` and its draft PR -- The **state file** `{program-name}.md` in repo-memory (on the `memory/autoloop` branch) +## Program Issue -### Steering Issue Body Format +Each program has **exactly one** open GitHub issue (labeled `autoloop-program`) titled `[Autoloop: {program-name}]`. This single issue is the source of truth for the program β€” it hosts: -```markdown -πŸ€– *Autoloop β€” steering issue for the `{program-name}` program.* - -## Links +- The **status comment** (the earliest bot comment, edited in place each iteration) β€” a dashboard of current state. +- A **per-iteration comment** for every iteration (accepted, rejected, or error) β€” the rolling log. +- **Human steering comments** β€” plain-prose comments from maintainers, treated by the agent as directives. -- **Branch**: [`autoloop/{program-name}`](https://github.com/{owner}/{repo}/tree/autoloop/{program-name}) -- **Pull Request**: #{pr_number} -- **State File**: [`{program-name}.md`](https://github.com/{owner}/{repo}/blob/memory/autoloop/{program-name}.md) +There are no separate "steering" or "experiment log" issues β€” they have all been collapsed into this one issue. -## Program +### Auto-Creation for File-Based Programs -**Goal**: {one-line summary from program.md} -**Metric**: {metric-name} ({higher/lower} is better) -**Current best**: {best_metric} -**Iterations**: {iteration_count} -``` +If `selected_issue` is `null` in `/tmp/gh-aw/autoloop.json`, the program is file-based **and** has no program issue yet. On the first run, create one with `create-issue`: -### Steering Issue Rules +- **Title**: `[Autoloop: {program-name}]` (the `[Autoloop] ` prefix is added automatically by the safe-output `title-prefix`, so pass the title as `{program-name}`). +- **Body**: the contents of the program file (`program.md`) plus a placeholder for the status comment so maintainers know one will be edited in place. +- **Labels**: `[autoloop-program, automation, autoloop]`. -- Create the steering issue on the **first accepted iteration** for the program if it does not already exist. -- **Update the issue body** whenever the best metric or PR number changes. -- **Add a comment** on each accepted iteration with a link to the commit and actions run. -- The steering issue is labeled `[automation, autoloop]`. -- Do NOT close the steering issue when the PR is merged β€” the branch continues to accumulate future iterations. +Record the new issue number in the state file's `Issue` field. On subsequent runs, the pre-step will discover the existing program issue (it scans open issues with the `autoloop-program` label) and `selected_issue` will be populated automatically. -## Issue-Based Program Updates - -When a program is defined via a GitHub issue (i.e., `selected_issue` is not null in `/tmp/gh-aw/autoloop.json`), the source issue itself serves as the program definition **and** as the primary interface for steering and monitoring the program. In addition to the normal iteration workflow (state file, steering issue, PR), you must also update the source issue. +For issue-based programs (`selected_issue` is not null on the very first run), no creation is needed β€” the source issue is already the program issue. The flow below is identical from there on. ### Status Comment -On the **first iteration** for an issue-based program, post a comment on the source issue. On **every subsequent iteration**, update that same comment (edit it, do not post a new one). This is the "status comment" β€” always the earliest bot comment on the issue. +On the **first iteration**, post a comment on the program issue. On **every subsequent iteration**, update that same comment (edit it, do not post a new one). This is the "status comment" β€” always the earliest bot comment on the issue. Find the status comment by searching for a comment containing ``. If multiple comments contain this sentinel, use the earliest one (lowest comment ID) and ignore the others. @@ -942,16 +559,16 @@ Find the status comment by searching for a comment containing `\s*\n)*", "", content, flags=re.DOTALL) + schedule_delta = None + target_metric = None + target_metric_invalid = None + fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content_stripped, re.DOTALL) + if not fm_match: + return schedule_delta, target_metric, target_metric_invalid + for line in fm_match.group(1).split("\n"): + if line.strip().startswith("schedule:"): + schedule_str = line.split(":", 1)[1].strip() + schedule_delta = parse_schedule(schedule_str) + if line.strip().startswith("target-metric:"): + raw = line.split(":", 1)[1].strip() + try: + target_metric = float(raw) + except (ValueError, TypeError): + target_metric_invalid = raw + return schedule_delta, target_metric, target_metric_invalid + + +def is_unconfigured(content): + """Return True if a program file still contains the unconfigured sentinel + or any TODO/REPLACE placeholder.""" + if "" in content: + return True + if re.search(r"\bTODO\b|\bREPLACE", content): + return True + return False + + +def check_skip_conditions(state): + """Return ``(should_skip, reason)`` based on the program state.""" + if str(state.get("completed", "")).lower() == "true" or state.get("completed") is True: + return True, "completed: target metric reached" + if state.get("paused"): + return True, "paused: {}".format(state.get("pause_reason", "unknown")) + recent = state.get("recent_statuses", [])[-5:] + if len(recent) >= 5 and all(s == "rejected" for s in recent): + return True, "plateau: 5 consecutive rejections" + return False, None + + +# --------------------------------------------------------------------------- +# I/O helpers +# --------------------------------------------------------------------------- + + +def read_program_state(program_name, repo_memory_dir=REPO_MEMORY_DIR): + """Read scheduling state from the repo-memory state file (or ``{}``).""" + state_file = os.path.join(repo_memory_dir, "{}.md".format(program_name)) + if not os.path.isfile(state_file): + print(" {}: no state file found (first run)".format(program_name)) + return {} + with open(state_file, encoding="utf-8") as f: + content = f.read() + return parse_machine_state(content) + + +def get_state_file_size(program_name, repo_memory_dir=REPO_MEMORY_DIR): + """Return the size of the program's state file in bytes (0 if missing). + + Surfaced in ``autoloop.json`` as ``state_file_size_bytes`` so the agent + can decide whether to compact the state file aggressively this iteration + (see the rolling-compaction rule in ``workflows/autoloop.md``'s + "Update Rules" section). + """ + state_file = os.path.join(repo_memory_dir, "{}.md".format(program_name)) + try: + st = os.stat(state_file) + except OSError: + return 0 + return st.st_size + + +def _bootstrap_template_if_missing(): + """Create ``.autoloop/programs/example.md`` if the directory is missing.""" + if os.path.isdir(PROGRAMS_DIR): + return + os.makedirs(PROGRAMS_DIR, exist_ok=True) + bt = chr(96) # backtick β€” keep gh-aw compiler happy if this ever gets inlined + template = "\n".join([ + "", + "", + "", + "", + "# Autoloop Program", + "", + "", + "", + "## Goal", + "", + "", + "", + "REPLACE THIS with your optimization goal.", + "", + "## Target", + "", + "", + "", + "Only modify these files:", + "- {bt}REPLACE_WITH_FILE{bt} -- (describe what this file does)".format(bt=bt), + "", + "Do NOT modify:", + "- (list files that must not be touched)", + "", + "## Evaluation", + "", + "", + "", + "{bt}{bt}{bt}bash".format(bt=bt), + "REPLACE_WITH_YOUR_EVALUATION_COMMAND", + "{bt}{bt}{bt}".format(bt=bt), + "", + "The metric is {bt}REPLACE_WITH_METRIC_NAME{bt}. **Lower/Higher is better.** (pick one)".format(bt=bt), + "", + ]) + with open(TEMPLATE_FILE, "w") as f: + f.write(template) + # Leave the template unstaged β€” the agent will create a draft PR with it + print("BOOTSTRAPPED: created {} locally (agent will create a draft PR)".format(TEMPLATE_FILE)) + + +def _scan_directory_programs(): + """Return paths of directory-based programs under ``PROGRAMS_DIR``.""" + out = [] + if not os.path.isdir(PROGRAMS_DIR): + return out + for entry in sorted(os.listdir(PROGRAMS_DIR)): + prog_dir = os.path.join(PROGRAMS_DIR, entry) + if os.path.isdir(prog_dir): + prog_file = os.path.join(prog_dir, "program.md") + if os.path.isfile(prog_file): + out.append(prog_file) + return out + + +def _scan_bare_programs(): + """Return paths of bare-markdown programs under ``PROGRAMS_DIR``.""" + return sorted(glob.glob(os.path.join(PROGRAMS_DIR, "*.md"))) + + +def _fetch_issue_programs(repo, github_token): + """Fetch open issues with the ``autoloop-program`` label and write their + bodies to ``ISSUE_PROGRAMS_DIR``. Returns ``(program_files, issue_programs)``. + + Errors are swallowed (with a warning) so a transient API failure doesn't + block the run for non-issue-based programs. + """ + program_files = [] + issue_programs = {} + os.makedirs(ISSUE_PROGRAMS_DIR, exist_ok=True) + next_url = ( + "https://api.github.com/repos/{}/issues" + "?labels=autoloop-program&state=open&per_page=100".format(repo) + ) + headers = { + "Authorization": "token {}".format(github_token), + "Accept": "application/vnd.github.v3+json", + } + issues = [] + try: + while next_url: + req = urllib.request.Request(next_url, headers=headers) + with urllib.request.urlopen(req, timeout=30) as resp: + page = json.loads(resp.read().decode()) + link_header = resp.headers.get("link") or resp.headers.get("Link") + issues.extend(page) + next_url = parse_link_header(link_header) + for issue in issues: + if issue.get("pull_request"): + continue # skip PRs + body = issue.get("body") or "" + title = issue.get("title") or "" + number = issue["number"] + slug = slugify_issue_title(title, number) + if slug in issue_programs: + print( + " Warning: slug '{}' (issue #{}) collides with issue #{}, " + "appending issue number".format( + slug, number, issue_programs[slug]["issue_number"] + ) + ) + slug = "{}-{}".format(slug, number) + issue_file = os.path.join(ISSUE_PROGRAMS_DIR, "{}.md".format(slug)) + with open(issue_file, "w") as f: + f.write(body) + program_files.append(issue_file) + issue_programs[slug] = {"issue_number": number, "file": issue_file, "title": title} + print(" Found issue-based program: '{}' (issue #{})".format(slug, number)) + except Exception as e: # noqa: BLE001 -- best-effort; logged below + print(" Warning: could not fetch issue-based programs: {}".format(e)) + return program_files, issue_programs + + +def _parse_target_metric_from_file(path): + """Re-parse a program file to extract its ``target-metric``, if any.""" + try: + with open(path) as f: + _, target_metric, _ = parse_program_frontmatter(f.read()) + return target_metric + except (OSError, ValueError, TypeError): + return None + + +# --------------------------------------------------------------------------- +# Existing PR lookup (single-PR-per-program invariant) +# --------------------------------------------------------------------------- + + +def _http_get_json(url, headers, timeout=30): + """Open ``url`` and return ``(parsed_body, link_header)``. + + Returns ``(None, None)`` on any HTTP/network error so callers can fall + through to the next strategy. Broken out into a module-level helper so + tests can monkey-patch it without touching ``urllib`` directly. + """ + try: + req = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(req, timeout=timeout) as resp: + body = json.loads(resp.read().decode()) + link_header = resp.headers.get("link") or resp.headers.get("Link") + return body, link_header + except (urllib.error.URLError, urllib.error.HTTPError, ValueError, OSError): + return None, None + + +def find_existing_pr_for_branch(repo, program_name, github_token, http_get_json=_http_get_json): + """Look up the open draft PR (if any) for ``autoloop/{program_name}``. + + Returns the PR number, or ``None`` if none is found. + + The single-PR-per-program invariant requires that we never open a second + draft PR for the same program. The agent uses the returned ``existing_pr`` + to decide between ``create-pull-request`` (only if ``None``) and + ``push-to-pull-request-branch`` (always preferred when an open PR exists). + + We also tolerate legacy framework-suffixed branch names of the form + ``autoloop/{program}-<6-40 hex chars>`` so installations upgrading from + before ``preserve-branch-name: true`` was set find their in-flight PR + rather than opening a second one. + """ + if not repo or not program_name or not github_token: + return None + owner = repo.split("/", 1)[0] + canonical_branch = "autoloop/{}".format(program_name) + headers = { + "Authorization": "token {}".format(github_token), + "Accept": "application/vnd.github.v3+json", + } + # Strategy 1: exact canonical branch name via the head= filter. + head_q = urllib.parse.quote("{}:{}".format(owner, canonical_branch), safe="") + url = "https://api.github.com/repos/{}/pulls?head={}&state=open".format(repo, head_q) + body, _ = http_get_json(url, headers) + if isinstance(body, list) and body: + first = body[0] + if isinstance(first, dict) and first.get("number"): + return first["number"] + + # Strategy 2: paginate open PRs and match either a legacy framework-suffixed + # branch (``autoloop/{name}-<6-40 hex>``) or a ``[Autoloop: {name}]`` title prefix. + suffix_regex = re.compile( + r"^autoloop/" + re.escape(program_name) + r"(-[0-9a-f]{6,40})?$" + ) + title_prefix = "[Autoloop: {}]".format(program_name) + next_url = "https://api.github.com/repos/{}/pulls?state=open&per_page=100".format(repo) + while next_url: + body, link_header = http_get_json(next_url, headers) + if not isinstance(body, list): + break + for pr in body: + if not isinstance(pr, dict): + continue + head_ref = "" + head = pr.get("head") or {} + if isinstance(head, dict): + head_ref = head.get("ref") or "" + if suffix_regex.match(head_ref): + return pr.get("number") + title = pr.get("title") + if isinstance(title, str) and title.startswith(title_prefix): + return pr.get("number") + next_url = parse_link_header(link_header) + return None + + +# --------------------------------------------------------------------------- +# Selection +# --------------------------------------------------------------------------- + + +def select_program(due, forced_program=None, all_programs=None, unconfigured=None, issue_programs=None): + """Pick the program to run. + + Returns ``(selected, selected_file, selected_issue, selected_target_metric, + deferred, error)``. ``error`` is a string describing why a forced selection + failed (and the caller should ``sys.exit(1)``); otherwise it is ``None``. + """ + all_programs = all_programs or {} + unconfigured = unconfigured or [] + issue_programs = issue_programs or {} + if forced_program: + if forced_program not in all_programs: + return ( + None, None, None, None, [], + "requested program '{}' not found. Available programs: {}".format( + forced_program, list(all_programs.keys()) + ), + ) + if forced_program in unconfigured: + return ( + None, None, None, None, [], + "requested program '{}' is unconfigured (has placeholders).".format( + forced_program + ), + ) + selected = forced_program + selected_file = all_programs[forced_program] + deferred = [p["name"] for p in due if p["name"] != forced_program] + selected_issue = ( + issue_programs[selected]["issue_number"] if selected in issue_programs else None + ) + selected_target_metric = None + for p in due: + if p["name"] == forced_program: + selected_target_metric = p.get("target_metric") + break + if selected_target_metric is None: + selected_target_metric = _parse_target_metric_from_file(selected_file) + return selected, selected_file, selected_issue, selected_target_metric, deferred, None + + if due: + # Normal scheduling: pick the single most-overdue program. + # ``last_run`` of None/empty sorts first (never run). + due_sorted = sorted(due, key=lambda p: p["last_run"] or "") + selected = due_sorted[0]["name"] + selected_file = due_sorted[0]["file"] + selected_target_metric = due_sorted[0].get("target_metric") + deferred = [p["name"] for p in due_sorted[1:]] + selected_issue = ( + issue_programs[selected]["issue_number"] if selected in issue_programs else None + ) + return selected, selected_file, selected_issue, selected_target_metric, deferred, None + + return None, None, None, None, [], None + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def main(): + github_token = os.environ.get("GITHUB_TOKEN", "") + repo = os.environ.get("GITHUB_REPOSITORY", "") + forced_program = os.environ.get("AUTOLOOP_PROGRAM", "").strip() + + _bootstrap_template_if_missing() + + # Find all program files from all locations: + # 1. Directory-based programs: .autoloop/programs//program.md (preferred) + # 2. Bare markdown programs: .autoloop/programs/.md (simple) + # 3. Issue-based programs: GitHub issues with the 'autoloop-program' label + program_files = [] + program_files.extend(_scan_directory_programs()) + program_files.extend(_scan_bare_programs()) + issue_files, issue_programs = _fetch_issue_programs(repo, github_token) + program_files.extend(issue_files) + + if not program_files: + # Fallback to single-file locations + for path in [".autoloop/program.md", "program.md"]: + if os.path.isfile(path): + program_files = [path] + break + + os.makedirs(OUTPUT_DIR, exist_ok=True) + + if not program_files: + print("NO_PROGRAMS_FOUND") + with open(OUTPUT_FILE, "w") as f: + json.dump( + { + "due": [], + "skipped": [], + "unconfigured": [], + "no_programs": True, + "head_branch": None, + "existing_pr": None, + }, + f, + ) + sys.exit(0) + + now = datetime.now(timezone.utc) + due = [] + skipped = [] + unconfigured = [] + all_programs = {} # name -> file path + + for pf in program_files: + name = get_program_name(pf) + all_programs[name] = pf + with open(pf) as f: + content = f.read() + + if is_unconfigured(content): + unconfigured.append(name) + continue + + schedule_delta, target_metric, invalid_target = parse_program_frontmatter(content) + if invalid_target is not None: + print(" Warning: {} has invalid target-metric value: {}".format(name, invalid_target)) + + # Read state from repo-memory + state = read_program_state(name) + if state: + print( + " {}: last_run={}, iteration_count={}".format( + name, state.get("last_run"), state.get("iteration_count") + ) + ) + else: + print(" {}: no state found (first run)".format(name)) + + last_run = None + lr = state.get("last_run") + if lr: + try: + last_run = datetime.fromisoformat(lr.replace("Z", "+00:00")) + except ValueError: + pass + + should_skip, reason = check_skip_conditions(state) + if should_skip: + skipped.append({"name": name, "reason": reason}) + continue + + # Check if due based on per-program schedule + if schedule_delta and last_run and now - last_run < schedule_delta: + skipped.append( + { + "name": name, + "reason": "not due yet", + "next_due": (last_run + schedule_delta).isoformat(), + } + ) + continue + + due.append({"name": name, "last_run": lr, "file": pf, "target_metric": target_metric}) + + selected, selected_file, selected_issue, selected_target_metric, deferred, error = ( + select_program(due, forced_program, all_programs, unconfigured, issue_programs) + ) + + if error: + print("ERROR: {}".format(error)) + sys.exit(1) + + if forced_program and selected: + print("FORCED: running program '{}' (manual dispatch)".format(forced_program)) + + # Look up the existing draft PR (if any) for the selected program, so the + # agent can enforce the single-PR-per-program invariant: never call + # create-pull-request when a PR for autoloop/{name} already exists. + # head_branch is always the canonical name (no suffix, no hash). + head_branch = None + existing_pr = None + if selected: + head_branch = "autoloop/{}".format(selected) + try: + existing_pr = find_existing_pr_for_branch(repo, selected, github_token) + except Exception as e: # noqa: BLE001 -- best-effort lookup + print(" Warning: existing PR lookup failed for {}: {}".format(selected, e)) + existing_pr = None + + result = { + "selected": selected, + "selected_file": selected_file, + "selected_issue": selected_issue, + "selected_target_metric": selected_target_metric, + "state_file_size_bytes": get_state_file_size(selected) if selected else 0, + "state_file_max_bytes": STATE_FILE_MAX_BYTES, + "issue_programs": { + name: info["issue_number"] for name, info in issue_programs.items() + }, + "deferred": deferred, + "skipped": skipped, + "unconfigured": unconfigured, + "no_programs": False, + "head_branch": head_branch, + "existing_pr": existing_pr, + } + + with open(OUTPUT_FILE, "w") as f: + json.dump(result, f, indent=2) + + print("=== Autoloop Program Check ===") + print("Selected program: {} ({})".format(selected or "(none)", selected_file or "n/a")) + print("Deferred (next run): {}".format(deferred or "(none)")) + print("Programs skipped: {}".format([s["name"] for s in skipped] or "(none)")) + print("Programs unconfigured: {}".format(unconfigured or "(none)")) + + if not selected and not unconfigured: + print("\nNo programs due this run. Exiting early.") + sys.exit(1) # Non-zero exit skips the agent step + + +if __name__ == "__main__": + main() diff --git a/workflows/sync-branches.md b/workflows/sync-branches.md index 29bacc1..cad1adb 100644 --- a/workflows/sync-branches.md +++ b/workflows/sync-branches.md @@ -96,19 +96,69 @@ steps: git('fetch', 'origin', branch); git('fetch', 'origin', defaultBranch); - // Check out the program branch - let checkout = git('checkout', branch); - if (checkout.returncode !== 0) { - // Try creating a local tracking branch - checkout = git('checkout', '-b', branch, 'origin/' + branch); + // Compute ahead/behind counts using the remote-tracking refs so we + // make a decision based on commit delta (not content delta). + const aheadResult = git('rev-list', '--count', + 'origin/' + defaultBranch + '..origin/' + branch); + const behindResult = git('rev-list', '--count', + 'origin/' + branch + '..origin/' + defaultBranch); + if (aheadResult.returncode !== 0 || behindResult.returncode !== 0) { + console.log(' Failed to compute ahead/behind for ' + branch + ': ' + + (aheadResult.stderr || behindResult.stderr)); + failed.push(branch); + continue; + } + const ahead = parseInt((aheadResult.stdout || '0').trim(), 10) || 0; + const behind = parseInt((behindResult.stdout || '0').trim(), 10) || 0; + console.log(' ahead=' + ahead + ' behind=' + behind); + + if (ahead === 0 && behind > 0) { + // All of the branch's commits are already in the default branch. + // Merging would produce a noisy "Merge main into branch" commit + // that re-exposes every historical file as a patch touch β€” the + // failure mode that triggers gh-aw's E003 (>100 files) when a + // new PR is opened. Fast-forward the canonical branch instead. + // This is lossless because ahead=0 proves every commit on the + // branch is already reachable from the default branch. + const ff = git('checkout', '-B', branch, 'origin/' + defaultBranch); + if (ff.returncode !== 0) { + console.log(' Failed to fast-forward ' + branch + ': ' + ff.stderr); + failed.push(branch); + continue; + } + // Use --force-with-lease so that if anyone else is simultaneously + // pushing to the branch, the update is rejected rather than + // overwriting their commits. + const push = git('push', '--force-with-lease', 'origin', branch); + if (push.returncode !== 0) { + console.log(' Failed to force-push ' + branch + ': ' + push.stderr); + failed.push(branch); + continue; + } + console.log(' Fast-forwarded ' + branch + ' to origin/' + defaultBranch); + continue; + } + + if (ahead === 0 && behind === 0) { + // Already at default branch β€” nothing to do. + console.log(' ' + branch + ' is already up to date with origin/' + defaultBranch); + continue; + } + + if (ahead > 0 && behind === 0) { + // Unique work preserved; no upstream drift to merge. + console.log(' ' + branch + ' is ahead of origin/' + defaultBranch + ' with no upstream drift; nothing to merge.'); + continue; } + + // True divergence (ahead > 0 && behind > 0): check out and merge. + let checkout = git('checkout', '-B', branch, 'origin/' + branch); if (checkout.returncode !== 0) { console.log(' Failed to checkout ' + branch + ': ' + checkout.stderr); failed.push(branch); continue; } - // Merge the default branch into the program branch const merge = git('merge', 'origin/' + defaultBranch, '--no-edit', '-m', 'Merge ' + defaultBranch + ' into ' + branch); if (merge.returncode !== 0) {