From 54336fbda8c42474c9c8decd9c573abad2cf19e9 Mon Sep 17 00:00:00 2001 From: Landon Cox Date: Sat, 4 Apr 2026 09:31:37 -0700 Subject: [PATCH] feat: add copilot-token-audit and rewrite copilot-token-optimizer Add a new daily copilot-token-audit workflow that: - Downloads 30 days of Copilot logs via gh aw logs --json - Computes per-workflow token usage, cost, and turn metrics - Persists daily snapshots to repo-memory for trend tracking - Generates charts and publishes an audit discussion Rewrite copilot-token-optimizer to: - Read audit snapshots from repo-memory to select targets - Use agentic-workflows MCP tools (logs, audit) for deep per-run analysis - Use gh aw CLI (installed via gh extension) for bulk log downloads - Analyze tool usage, cache efficiency, error patterns, and prompt size - Produce conservative, evidence-based optimization recommendations - Track optimization history to avoid re-analyzing recent targets Remove the old copilot-token-usage-analyzer (replaced by copilot-token-audit). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ....lock.yml => copilot-token-audit.lock.yml} | 346 ++++++++++--- .github/workflows/copilot-token-audit.md | 222 +++++++++ .../copilot-token-optimizer.lock.yml | 325 +++++++++---- .github/workflows/copilot-token-optimizer.md | 457 ++++++++---------- .../workflows/copilot-token-usage-analyzer.md | 409 ---------------- 5 files changed, 911 insertions(+), 848 deletions(-) rename .github/workflows/{copilot-token-usage-analyzer.lock.yml => copilot-token-audit.lock.yml} (81%) create mode 100644 .github/workflows/copilot-token-audit.md delete mode 100644 .github/workflows/copilot-token-usage-analyzer.md diff --git a/.github/workflows/copilot-token-usage-analyzer.lock.yml b/.github/workflows/copilot-token-audit.lock.yml similarity index 81% rename from .github/workflows/copilot-token-usage-analyzer.lock.yml rename to .github/workflows/copilot-token-audit.lock.yml index f3edcd2f792..ccbd5bef3c8 100644 --- a/.github/workflows/copilot-token-usage-analyzer.lock.yml +++ b/.github/workflows/copilot-token-audit.lock.yml @@ -20,23 +20,23 @@ # # For more information: https://github.github.com/gh-aw/introduction/overview/ # -# Daily analysis of Copilot token consumption across all agentic workflows, creating a usage report issue with per-workflow statistics and optimization opportunities +# Daily audit of Copilot token usage across all agentic workflows with historical trend tracking # # Resolved workflow manifest: # Imports: +# - copilot-setup-steps.yml +# - shared/daily-audit-discussion.md # - shared/python-dataviz.md +# - shared/repo-memory-standard.md # - shared/reporting.md -# - shared/token-logs-24h.md -# - shared/trends.md -# - shared/charts-with-trending.md # -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"ec24173efbde44f513eddb2473dda09e0952eac09cbefd2d0d5c293ab21069c9","strict":true,"agent_id":"copilot"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"98b2c43eb0329c16327d3d530267797b8feb0a46a34b09fc5630cabb2f79c5b7","strict":true,"agent_id":"copilot"} -name: "Copilot Token Usage Analyzer" +name: "Daily Copilot Token Usage Audit" "on": schedule: - - cron: "46 8 * * 1-5" - # Friendly format: daily around 09:00 on weekdays (scattered) + - cron: "21 11 * * 1-5" + # Friendly format: daily around 12:00 on weekdays (scattered) workflow_dispatch: inputs: aw_context: @@ -50,7 +50,7 @@ permissions: {} concurrency: group: "gh-aw-${{ github.workflow }}" -run-name: "Copilot Token Usage Analyzer" +run-name: "Daily Copilot Token Usage Audit" jobs: activation: @@ -82,7 +82,7 @@ jobs: GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || 'auto' }} GH_AW_INFO_VERSION: "latest" GH_AW_INFO_AGENT_VERSION: "latest" - GH_AW_INFO_WORKFLOW_NAME: "Copilot Token Usage Analyzer" + GH_AW_INFO_WORKFLOW_NAME: "Daily Copilot Token Usage Audit" GH_AW_INFO_EXPERIMENTAL: "false" GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true" GH_AW_INFO_STAGED: "false" @@ -112,7 +112,7 @@ jobs: - name: Check workflow lock file uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 env: - GH_AW_WORKFLOW_FILE: "copilot-token-usage-analyzer.lock.yml" + GH_AW_WORKFLOW_FILE: "copilot-token-audit.lock.yml" GH_AW_CONTEXT_WORKFLOW_REF: "${{ github.workflow_ref }}" with: script: | @@ -132,21 +132,24 @@ jobs: GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + GH_AW_WIKI_NOTE: ${{ '' }} # poutine:ignore untrusted_checkout_exec run: | bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh { - cat << 'GH_AW_PROMPT_413b258b2f495f5e_EOF' + cat << 'GH_AW_PROMPT_58a00121d60b1539_EOF' - GH_AW_PROMPT_413b258b2f495f5e_EOF + GH_AW_PROMPT_58a00121d60b1539_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/agentic_workflows_guide.md" cat "${RUNNER_TEMP}/gh-aw/prompts/cache_memory_prompt.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/repo_memory_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" - cat << 'GH_AW_PROMPT_413b258b2f495f5e_EOF' + cat << 'GH_AW_PROMPT_58a00121d60b1539_EOF' - Tools: create_issue, upload_asset, missing_tool, missing_data, noop + Tools: create_discussion, upload_asset, missing_tool, missing_data, noop upload_asset: provide a file path; returns a URL; assets are published after the workflow completes (safeoutputs). @@ -178,23 +181,19 @@ jobs: {{/if}} - GH_AW_PROMPT_413b258b2f495f5e_EOF + GH_AW_PROMPT_58a00121d60b1539_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" - cat << 'GH_AW_PROMPT_413b258b2f495f5e_EOF' + cat << 'GH_AW_PROMPT_58a00121d60b1539_EOF' - {{#runtime-import .github/workflows/shared/token-logs-24h.md}} {{#runtime-import .github/workflows/shared/reporting.md}} - {{#runtime-import .github/workflows/shared/charts-with-trending.md}} {{#runtime-import .github/workflows/shared/python-dataviz.md}} - {{#runtime-import .github/workflows/shared/trends.md}} - {{#runtime-import .github/workflows/copilot-token-usage-analyzer.md}} - GH_AW_PROMPT_413b258b2f495f5e_EOF + {{#runtime-import .github/workflows/copilot-token-audit.md}} + GH_AW_PROMPT_58a00121d60b1539_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -216,6 +215,12 @@ jobs: GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + GH_AW_MEMORY_BRANCH_NAME: 'memory/token-audit' + GH_AW_MEMORY_CONSTRAINTS: "\n\n**Constraints:**\n- **Allowed Files**: Only files matching patterns: memory/token-audit/*.json, memory/token-audit/*.jsonl, memory/token-audit/*.csv, memory/token-audit/*.md\n- **Max File Size**: 102400 bytes (0.10 MB) per file\n- **Max File Count**: 100 files per commit\n- **Max Patch Size**: 10240 bytes (10 KB) total per push (max: 100 KB)\n" + GH_AW_MEMORY_DESCRIPTION: ' Historical daily Copilot token usage snapshots' + GH_AW_MEMORY_DIR: '/tmp/gh-aw/repo-memory/default/' + GH_AW_MEMORY_TARGET_REPO: ' of the current repository' + GH_AW_WIKI_NOTE: '' with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -237,7 +242,13 @@ jobs: GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: process.env.GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER, GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY, GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID, - GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE + GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE, + GH_AW_MEMORY_BRANCH_NAME: process.env.GH_AW_MEMORY_BRANCH_NAME, + GH_AW_MEMORY_CONSTRAINTS: process.env.GH_AW_MEMORY_CONSTRAINTS, + GH_AW_MEMORY_DESCRIPTION: process.env.GH_AW_MEMORY_DESCRIPTION, + GH_AW_MEMORY_DIR: process.env.GH_AW_MEMORY_DIR, + GH_AW_MEMORY_TARGET_REPO: process.env.GH_AW_MEMORY_TARGET_REPO, + GH_AW_WIKI_NOTE: process.env.GH_AW_WIKI_NOTE } }); - name: Validate prompt placeholders @@ -277,7 +288,7 @@ jobs: GH_AW_ASSETS_BRANCH: "assets/${{ github.workflow }}" GH_AW_ASSETS_MAX_SIZE_KB: 10240 GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs - GH_AW_WORKFLOW_ID_SANITIZED: copilottokenusageanalyzer + GH_AW_WORKFLOW_ID_SANITIZED: copilottokenaudit outputs: checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} effective_tokens: ${{ steps.parse-mcp-gateway.outputs.effective_tokens }} @@ -304,24 +315,50 @@ jobs: echo "GH_AW_SAFE_OUTPUTS=${RUNNER_TEMP}/gh-aw/safeoutputs/outputs.jsonl" >> "$GITHUB_OUTPUT" echo "GH_AW_SAFE_OUTPUTS_CONFIG_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" >> "$GITHUB_OUTPUT" echo "GH_AW_SAFE_OUTPUTS_TOOLS_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/tools.json" >> "$GITHUB_OUTPUT" - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - name: Setup Python - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: '3.12' - name: Create gh-aw temp directory run: bash ${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh - name: Configure gh CLI for GitHub Enterprise run: bash ${RUNNER_TEMP}/gh-aw/actions/configure_gh_for_ghe.sh env: GH_TOKEN: ${{ github.token }} + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Setup Go + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 + with: + go-version: '1.25' + cache: false + - name: Capture GOROOT for AWF chroot mode + run: echo "GOROOT=$(go env GOROOT)" >> "$GITHUB_ENV" + - name: Setup Node.js + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version: '24' + cache: 'npm' + cache-dependency-path: 'actions/setup/js/package-lock.json' + package-manager-cache: false + - name: Setup Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + - name: Install gh-aw extension + run: curl -fsSL https://raw.githubusercontent.com/github/gh-aw/refs/heads/main/install-gh-aw.sh | bash + - name: Install npm dependencies + run: cd actions/setup/js && npm ci + - name: Install development dependencies + run: make deps-dev + - name: Build code + run: make build - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Restore 24h token logs from cache - run: "set -euo pipefail\nTOKEN_LOGS_DIR=\"/tmp/gh-aw/token-logs\"\nmkdir -p \"$TOKEN_LOGS_DIR\"\nTODAY=$(date -u +%Y-%m-%d)\n\n# Look for today's pre-fetched data from the Token Logs Fetch workflow\nFETCH_RUN_ID=$(gh run list \\\n --workflow \"token-logs-fetch.lock.yml\" \\\n --status success \\\n --limit 1 \\\n --json databaseId \\\n --jq '.[0].databaseId' 2>/dev/null || echo \"\")\n\nUSED_CACHE=false\nif [ -n \"$FETCH_RUN_ID\" ]; then\n CACHE_TMP=\"/tmp/gh-aw/token-logs-fetch-cache\"\n mkdir -p \"$CACHE_TMP\"\n gh run download \"$FETCH_RUN_ID\" \\\n --repo \"$GITHUB_REPOSITORY\" \\\n --name \"cache-memory\" \\\n --dir \"$CACHE_TMP\" \\\n 2>/dev/null || true\n CACHE_DATE=$(cat \"$CACHE_TMP/token-logs/fetch-date.txt\" 2>/dev/null || echo \"\")\n if [ \"$CACHE_DATE\" = \"$TODAY\" ] && \\\n [ -s \"$CACHE_TMP/token-logs/copilot-runs.json\" ] && \\\n [ -s \"$CACHE_TMP/token-logs/claude-runs.json\" ]; then\n echo \"โœ… Using pre-fetched logs from Token Logs Fetch run $FETCH_RUN_ID (date: $CACHE_DATE)\"\n cp \"$CACHE_TMP/token-logs/copilot-runs.json\" \"$TOKEN_LOGS_DIR/copilot-runs.json\"\n cp \"$CACHE_TMP/token-logs/claude-runs.json\" \"$TOKEN_LOGS_DIR/claude-runs.json\"\n USED_CACHE=true\n else\n echo \"โ„น๏ธ No valid cached logs found (cache date: ${CACHE_DATE:-none}, today: $TODAY)\"\n fi\nfi\n\nif [ \"$USED_CACHE\" != \"true\" ]; then\n echo \"๐Ÿ“ฅ Downloading Copilot and Claude workflow runs from last 24 hours...\"\n\n # Ensure gh-aw CLI is installed โ€” this shared step runs before user-defined steps.\n # Install failure is non-fatal to match the fallback-safe behavior of gh aw logs below.\n GH_AW_AVAILABLE=false\n if gh extension list 2>/dev/null | grep -q \"github/gh-aw\"; then\n GH_AW_AVAILABLE=true\n else\n echo \"๐Ÿ“ฆ Installing gh-aw CLI extension...\"\n if gh extension install github/gh-aw 2>/dev/null; then\n GH_AW_AVAILABLE=true\n else\n echo \"โš ๏ธ Failed to install gh-aw CLI extension; continuing with empty token logs.\"\n fi\n fi\n\n # Check GitHub API rate limit before downloading logs\n RATE_INFO=$(gh api rate_limit --jq '.rate | \"\\(.remaining)/\\(.limit) (resets \\(.reset | todate))\"' 2>/dev/null || echo \"unknown\")\n echo \"๐Ÿ“Š GitHub API rate limit before download: $RATE_INFO\"\n\n if [ \"$GH_AW_AVAILABLE\" = \"true\" ]; then\n LOGS_STDERR=\"/tmp/token-logs-copilot-stderr.log\"\n gh aw logs \\\n --engine copilot \\\n --start-date -1d \\\n --json \\\n -c 300 \\\n > /tmp/token-logs-copilot-raw.json 2>\"$LOGS_STDERR\"\n COPILOT_EXIT=$?\n if [ \"$COPILOT_EXIT\" -ne 0 ]; then\n echo \"โš ๏ธ gh aw logs --engine copilot exited with code $COPILOT_EXIT\"\n cat \"$LOGS_STDERR\" | tail -5\n echo '{\"runs\":[]}' > /tmp/token-logs-copilot-raw.json\n fi\n else\n echo '{\"runs\":[]}' > /tmp/token-logs-copilot-raw.json\n fi\n jq '.runs // []' /tmp/token-logs-copilot-raw.json > \"$TOKEN_LOGS_DIR/copilot-runs.json\" 2>/dev/null || echo \"[]\" > \"$TOKEN_LOGS_DIR/copilot-runs.json\"\n\n if [ \"$GH_AW_AVAILABLE\" = \"true\" ]; then\n LOGS_STDERR=\"/tmp/token-logs-claude-stderr.log\"\n gh aw logs \\\n --engine claude \\\n --start-date -1d \\\n --json \\\n -c 300 \\\n > /tmp/token-logs-claude-raw.json 2>\"$LOGS_STDERR\"\n CLAUDE_EXIT=$?\n if [ \"$CLAUDE_EXIT\" -ne 0 ]; then\n echo \"โš ๏ธ gh aw logs --engine claude exited with code $CLAUDE_EXIT\"\n cat \"$LOGS_STDERR\" | tail -5\n echo '{\"runs\":[]}' > /tmp/token-logs-claude-raw.json\n fi\n else\n echo '{\"runs\":[]}' > /tmp/token-logs-claude-raw.json\n fi\n jq '.runs // []' /tmp/token-logs-claude-raw.json > \"$TOKEN_LOGS_DIR/claude-runs.json\" 2>/dev/null || echo \"[]\" > \"$TOKEN_LOGS_DIR/claude-runs.json\"\n\n # Check rate limit after downloads to see consumption\n RATE_INFO=$(gh api rate_limit --jq '.rate | \"\\(.remaining)/\\(.limit) (resets \\(.reset | todate))\"' 2>/dev/null || echo \"unknown\")\n echo \"๐Ÿ“Š GitHub API rate limit after download: $RATE_INFO\"\nfi\n\necho \"โœ… Copilot runs: $(jq 'length' \"$TOKEN_LOGS_DIR/copilot-runs.json\")\"\necho \"โœ… Claude runs: $(jq 'length' \"$TOKEN_LOGS_DIR/claude-runs.json\")\"" + GH_TOKEN: ${{ github.token }} + name: Recompile workflows + run: make recompile || true + - name: Setup uv + uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # eac588ad8def6316056a12d4907a9d4d84ff7a3b + - name: Install Go language server (gopls) + run: go install golang.org/x/tools/gopls@latest + - name: Install TypeScript language server + run: npm install -g typescript-language-server typescript - name: Setup Python environment run: "# Create working directory for Python scripts\nmkdir -p /tmp/gh-aw/python\nmkdir -p /tmp/gh-aw/python/data\nmkdir -p /tmp/gh-aw/python/charts\nmkdir -p /tmp/gh-aw/python/artifacts\n\necho \"Python environment setup complete\"\necho \"Working directory: /tmp/gh-aw/python\"\necho \"Data directory: /tmp/gh-aw/python/data\"\necho \"Charts directory: /tmp/gh-aw/python/charts\"\necho \"Artifacts directory: /tmp/gh-aw/python/artifacts\"\n" - name: Install Python scientific libraries @@ -356,8 +393,8 @@ jobs: gh aw --version - env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Download Copilot workflow runs (last 24h) - run: "set -euo pipefail\nmkdir -p /tmp/token-analyzer\n\n# Use pre-fetched logs from the shared token-logs-24h pre-step\ncp /tmp/gh-aw/token-logs/copilot-runs.json /tmp/token-analyzer/copilot-runs.json 2>/dev/null || echo \"[]\" > /tmp/token-analyzer/copilot-runs.json\n\nRUN_COUNT=$(jq 'length' /tmp/token-analyzer/copilot-runs.json 2>/dev/null || echo 0)\necho \"โœ… Found ${RUN_COUNT} Copilot workflow runs\"\n\n# Download token-usage.jsonl artifacts for per-model breakdown\n# We look for the firewall-audit-logs artifact which contains token-usage.jsonl\nARTIFACT_DIR=\"/tmp/token-analyzer/artifacts\"\nmkdir -p \"$ARTIFACT_DIR\"\n\necho \"๐Ÿ“ฅ Downloading token-usage.jsonl artifacts...\"\njq -r '.[0:50][]?.database_id' /tmp/token-analyzer/copilot-runs.json 2>/dev/null > /tmp/token-analyzer/run-ids.txt || true\nwhile read -r run_id; do\n run_dir=\"$ARTIFACT_DIR/$run_id\"\n mkdir -p \"$run_dir\"\n gh run download \"$run_id\" \\\n --repo \"$GITHUB_REPOSITORY\" \\\n --name \"firewall-audit-logs\" \\\n --dir \"$run_dir\" \\\n 2>/dev/null || true\ndone < /tmp/token-analyzer/run-ids.txt\n\n# Count how many token-usage.jsonl files we got\nJSONL_COUNT=$(find \"$ARTIFACT_DIR\" -name \"token-usage.jsonl\" 2>/dev/null | wc -l)\necho \"โœ… Downloaded ${JSONL_COUNT} token-usage.jsonl artifacts\"\n\n# Merge all token-usage.jsonl files into a single aggregate file annotated with run_id\nMERGED_FILE=\"/tmp/token-analyzer/token-usage-merged.jsonl\"\n> \"$MERGED_FILE\"\nfind \"$ARTIFACT_DIR\" -name \"token-usage.jsonl\" > /tmp/token-analyzer/jsonl-files.txt 2>/dev/null || true\nwhile read -r f; do\n run_id=$(echo \"$f\" | grep -oP '(?<=/artifacts/)\\d+(?=/)' || true)\n while IFS= read -r line; do\n if [ -n \"$line\" ]; then\n echo \"${line}\" | jq --arg run_id \"$run_id\" '. + {run_id: $run_id}' >> \"$MERGED_FILE\" 2>/dev/null || true\n fi\n done < \"$f\"\ndone < /tmp/token-analyzer/jsonl-files.txt\n\nRECORD_COUNT=$(wc -l < \"$MERGED_FILE\" 2>/dev/null || echo 0)\necho \"โœ… Merged ${RECORD_COUNT} token usage records\"\n" + name: Download Copilot workflow logs + run: "set -euo pipefail\nmkdir -p /tmp/gh-aw/token-audit\n\n# Download last 30 days of Copilot logs as JSON\ngh aw logs \\\n --engine copilot \\\n --start-date -30d \\\n --json \\\n -c 500 \\\n > /tmp/gh-aw/token-audit/copilot-logs.json\n\nTOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/copilot-logs.json)\necho \"โœ… Downloaded $TOTAL Copilot workflow runs (last 30 days)\"\n" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory @@ -374,6 +411,16 @@ jobs: GH_AW_CACHE_DIR: /tmp/gh-aw/cache-memory GH_AW_MIN_INTEGRITY: none run: bash ${RUNNER_TEMP}/gh-aw/actions/setup_cache_memory_git.sh + # Repo memory git-based storage configuration from frontmatter processed below + - name: Clone repo-memory branch (default) + env: + GH_TOKEN: ${{ github.token }} + GITHUB_SERVER_URL: ${{ github.server_url }} + BRANCH_NAME: memory/token-audit + TARGET_REPO: ${{ github.repository }} + MEMORY_DIR: /tmp/gh-aw/repo-memory/default + CREATE_ORPHAN: true + run: bash ${RUNNER_TEMP}/gh-aw/actions/clone_repo_memory_branch.sh - name: Configure Git credentials env: REPO_NAME: ${{ github.repository }} @@ -418,29 +465,53 @@ jobs: await determineAutomaticLockdown(github, context, core); - name: Download container images run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.25.13 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.13 ghcr.io/github/gh-aw-firewall/squid:0.25.13 ghcr.io/github/gh-aw-mcpg:v0.2.12 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine + - name: Install gh-aw extension + env: + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + run: | + # Check if gh-aw extension is already installed + if gh extension list | grep -q "github/gh-aw"; then + echo "gh-aw extension already installed, upgrading..." + gh extension upgrade gh-aw || true + else + echo "Installing gh-aw extension..." + gh extension install github/gh-aw + fi + gh aw --version + # Copy the gh-aw binary to ${RUNNER_TEMP}/gh-aw for MCP server containerization + mkdir -p ${RUNNER_TEMP}/gh-aw + GH_AW_BIN=$(which gh-aw 2>/dev/null || find ~/.local/share/gh/extensions/gh-aw -name 'gh-aw' -type f 2>/dev/null | head -1) + if [ -n "$GH_AW_BIN" ] && [ -f "$GH_AW_BIN" ]; then + cp "$GH_AW_BIN" ${RUNNER_TEMP}/gh-aw/gh-aw + chmod +x ${RUNNER_TEMP}/gh-aw/gh-aw + echo "Copied gh-aw binary to ${RUNNER_TEMP}/gh-aw/gh-aw" + else + echo "::error::Failed to find gh-aw binary for MCP server" + exit 1 + fi - name: Write Safe Outputs Config run: | mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_73e86c03c0e9b7fc_EOF' - {"create_issue":{"close_older_issues":true,"expires":48,"labels":["automated-analysis","token-usage","copilot"],"max":1,"title_prefix":"๐Ÿ“Š Copilot Token Usage Report: "},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"upload_asset":{"allowed-exts":[".png",".jpg",".jpeg"],"branch":"assets/${{ github.workflow }}","max-size":10240}} - GH_AW_SAFE_OUTPUTS_CONFIG_73e86c03c0e9b7fc_EOF + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_c30dea8b77533504_EOF' + {"create_discussion":{"category":"audits","close_older_discussions":true,"expires":72,"fallback_to_issue":true,"max":1,"title_prefix":"[copilot-token-audit] "},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"push_repo_memory":{"memories":[{"dir":"/tmp/gh-aw/repo-memory/default","id":"default","max_file_count":100,"max_file_size":102400,"max_patch_size":10240}]},"upload_asset":{"allowed-exts":[".png",".jpg",".jpeg"],"branch":"assets/${{ github.workflow }}","max-size":10240}} + GH_AW_SAFE_OUTPUTS_CONFIG_c30dea8b77533504_EOF - name: Write Safe Outputs Tools run: | - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_257eeceb021791c5_EOF' + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_85b5d925931c7354_EOF' { "description_suffixes": { - "create_issue": " CONSTRAINTS: Maximum 1 issue(s) can be created. Title will be prefixed with \"๐Ÿ“Š Copilot Token Usage Report: \". Labels [\"automated-analysis\" \"token-usage\" \"copilot\"] will be automatically added.", + "create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[copilot-token-audit] \". Discussions will be created in category \"audits\".", "upload_asset": " CONSTRAINTS: Maximum file size: 10240KB. Allowed file extensions: [.png .jpg .jpeg]." }, "repo_params": {}, "dynamic_tools": [] } - GH_AW_SAFE_OUTPUTS_TOOLS_META_257eeceb021791c5_EOF - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_e43cfe60c6227119_EOF' + GH_AW_SAFE_OUTPUTS_TOOLS_META_85b5d925931c7354_EOF + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_a4a791c1bf13b823_EOF' { - "create_issue": { + "create_discussion": { "defaultMax": 1, "fields": { "body": { @@ -449,22 +520,15 @@ jobs: "sanitize": true, "maxLength": 65000 }, - "labels": { - "type": "array", - "itemType": "string", - "itemSanitize": true, - "itemMaxLength": 128 - }, - "parent": { - "issueOrPRNumber": true + "category": { + "type": "string", + "sanitize": true, + "maxLength": 128 }, "repo": { "type": "string", "maxLength": 256 }, - "temporary_id": { - "type": "string" - }, "title": { "required": true, "type": "string", @@ -540,7 +604,7 @@ jobs: } } } - GH_AW_SAFE_OUTPUTS_VALIDATION_e43cfe60c6227119_EOF + GH_AW_SAFE_OUTPUTS_VALIDATION_a4a791c1bf13b823_EOF node ${RUNNER_TEMP}/gh-aw/actions/generate_safe_outputs_tools.cjs - name: Generate Safe Outputs MCP Server Config id: safe-outputs-config @@ -594,6 +658,7 @@ jobs: GITHUB_MCP_GUARD_MIN_INTEGRITY: ${{ steps.determine-automatic-lockdown.outputs.min_integrity }} GITHUB_MCP_GUARD_REPOS: ${{ steps.determine-automatic-lockdown.outputs.repos }} GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | set -eo pipefail mkdir -p /tmp/gh-aw/mcp-config @@ -613,9 +678,28 @@ jobs: export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.12' mkdir -p /home/runner/.copilot - cat << GH_AW_MCP_CONFIG_2ec21b6ef0e2110c_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh + cat << GH_AW_MCP_CONFIG_e77bc9e658c505d1_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh { "mcpServers": { + "agenticworkflows": { + "type": "stdio", + "container": "localhost/gh-aw:dev", + "mounts": ["\${GITHUB_WORKSPACE}:\${GITHUB_WORKSPACE}:rw", "/tmp/gh-aw:/tmp/gh-aw:rw"], + "args": ["--network", "host", "-w", "\${GITHUB_WORKSPACE}"], + "env": { + "DEBUG": "*", + "GITHUB_TOKEN": "\${GITHUB_TOKEN}", + "GITHUB_ACTOR": "\${GITHUB_ACTOR}", + "GITHUB_REPOSITORY": "\${GITHUB_REPOSITORY}" + }, + "guard-policies": { + "write-sink": { + "accept": [ + "*" + ] + } + } + }, "github": { "type": "stdio", "container": "ghcr.io/github/github-mcp-server:v0.32.0", @@ -623,7 +707,7 @@ jobs: "GITHUB_HOST": "\${GITHUB_SERVER_URL}", "GITHUB_PERSONAL_ACCESS_TOKEN": "\${GITHUB_MCP_SERVER_TOKEN}", "GITHUB_READ_ONLY": "1", - "GITHUB_TOOLSETS": "context,repos,issues,pull_requests,actions" + "GITHUB_TOOLSETS": "context,repos,issues,pull_requests" }, "guard-policies": { "allow-only": { @@ -654,7 +738,7 @@ jobs: "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" } } - GH_AW_MCP_CONFIG_2ec21b6ef0e2110c_EOF + GH_AW_MCP_CONFIG_e77bc9e658c505d1_EOF - name: Download activation artifact uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: @@ -666,7 +750,7 @@ jobs: - name: Execute GitHub Copilot CLI id: agentic_execution # Copilot CLI tool arguments (sorted): - timeout-minutes: 30 + timeout-minutes: 25 run: | set -o pipefail touch /tmp/gh-aw/agent-step-summary.md @@ -820,6 +904,15 @@ jobs: if [ ! -f /tmp/gh-aw/agent_output.json ]; then echo '{"items":[]}' > /tmp/gh-aw/agent_output.json fi + # Upload repo memory as artifacts for push job + - name: Upload repo-memory artifact (default) + if: always() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: repo-memory-default + path: /tmp/gh-aw/repo-memory/default + retention-days: 1 + if-no-files-found: ignore - name: Commit cache-memory changes if: always() env: @@ -875,6 +968,7 @@ jobs: - activation - agent - detection + - push_repo_memory - safe_outputs - update_cache_memory - upload_assets @@ -882,9 +976,10 @@ jobs: runs-on: ubuntu-slim permissions: contents: write + discussions: write issues: write concurrency: - group: "gh-aw-conclusion-copilot-token-usage-analyzer" + group: "gh-aw-conclusion-copilot-token-audit" cancel-in-progress: false outputs: noop_message: ${{ steps.noop.outputs.noop_message }} @@ -922,7 +1017,8 @@ jobs: env: GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_NOOP_MAX: "1" - GH_AW_WORKFLOW_NAME: "Copilot Token Usage Analyzer" + GH_AW_WORKFLOW_NAME: "Daily Copilot Token Usage Audit" + GH_AW_TRACKER_ID: "copilot-token-audit" GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} GH_AW_NOOP_REPORT_AS_ISSUE: "true" @@ -939,7 +1035,8 @@ jobs: env: GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_MISSING_TOOL_CREATE_ISSUE: "true" - GH_AW_WORKFLOW_NAME: "Copilot Token Usage Analyzer" + GH_AW_WORKFLOW_NAME: "Daily Copilot Token Usage Audit" + GH_AW_TRACKER_ID: "copilot-token-audit" with: github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} script: | @@ -953,17 +1050,24 @@ jobs: uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 env: GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} - GH_AW_WORKFLOW_NAME: "Copilot Token Usage Analyzer" + GH_AW_WORKFLOW_NAME: "Daily Copilot Token Usage Audit" + GH_AW_TRACKER_ID: "copilot-token-audit" GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} - GH_AW_WORKFLOW_ID: "copilot-token-usage-analyzer" + GH_AW_WORKFLOW_ID: "copilot-token-audit" GH_AW_ENGINE_ID: "copilot" GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} GH_AW_INFERENCE_ACCESS_ERROR: ${{ needs.agent.outputs.inference_access_error }} + GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }} + GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }} GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }} + GH_AW_PUSH_REPO_MEMORY_RESULT: ${{ needs.push_repo_memory.result }} + GH_AW_REPO_MEMORY_VALIDATION_FAILED_default: ${{ needs.push_repo_memory.outputs.validation_failed_default }} + GH_AW_REPO_MEMORY_VALIDATION_ERROR_default: ${{ needs.push_repo_memory.outputs.validation_error_default }} + GH_AW_REPO_MEMORY_PATCH_SIZE_EXCEEDED_default: ${{ needs.push_repo_memory.outputs.patch_size_exceeded_default }} GH_AW_GROUP_REPORTS: "false" GH_AW_FAILURE_REPORT_AS_ISSUE: "true" - GH_AW_TIMEOUT_MINUTES: "30" + GH_AW_TIMEOUT_MINUTES: "25" with: github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} script: | @@ -1055,8 +1159,8 @@ jobs: if: always() && steps.detection_guard.outputs.run_detection == 'true' uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 env: - WORKFLOW_NAME: "Copilot Token Usage Analyzer" - WORKFLOW_DESCRIPTION: "Daily analysis of Copilot token consumption across all agentic workflows, creating a usage report issue with per-workflow statistics and optimization opportunities" + WORKFLOW_NAME: "Daily Copilot Token Usage Audit" + WORKFLOW_DESCRIPTION: "Daily audit of Copilot token usage across all agentic workflows with historical trend tracking" HAS_PATCH: ${{ needs.agent.outputs.has_patch }} with: script: | @@ -1126,6 +1230,91 @@ jobs: const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_threat_detection_results.cjs'); await main(); + push_repo_memory: + needs: + - agent + - detection + if: > + always() && (needs.detection.result == 'success' || needs.detection.result == 'skipped') && + needs.agent.result == 'success' + runs-on: ubuntu-slim + permissions: + contents: write + concurrency: + group: "push-repo-memory-${{ github.repository }}|memory/token-audit" + cancel-in-progress: false + outputs: + patch_size_exceeded_default: ${{ steps.push_repo_memory_default.outputs.patch_size_exceeded }} + validation_error_default: ${{ steps.push_repo_memory_default.outputs.validation_error }} + validation_failed_default: ${{ steps.push_repo_memory_default.outputs.validation_failed }} + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + sparse-checkout: . + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + git config --global am.keepcr true + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Download repo-memory artifact (default) + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + continue-on-error: true + with: + name: repo-memory-default + path: /tmp/gh-aw/repo-memory/default + - name: Push repo-memory changes (default) + id: push_repo_memory_default + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_TOKEN: ${{ github.token }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_SERVER_URL: ${{ github.server_url }} + ARTIFACT_DIR: /tmp/gh-aw/repo-memory/default + MEMORY_ID: default + TARGET_REPO: ${{ github.repository }} + BRANCH_NAME: memory/token-audit + MAX_FILE_SIZE: 102400 + MAX_FILE_COUNT: 100 + MAX_PATCH_SIZE: 10240 + ALLOWED_EXTENSIONS: '[]' + FILE_GLOB_FILTER: "memory/token-audit/*.json memory/token-audit/*.jsonl memory/token-audit/*.csv memory/token-audit/*.md" + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/push_repo_memory.cjs'); + await main(); + - name: Restore actions folder + if: always() + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions/setup + sparse-checkout-cone-mode: true + persist-credentials: false + safe_outputs: needs: - agent @@ -1134,22 +1323,22 @@ jobs: runs-on: ubuntu-slim permissions: contents: write + discussions: write issues: write timeout-minutes: 15 env: - GH_AW_CALLER_WORKFLOW_ID: "${{ github.repository }}/copilot-token-usage-analyzer" + GH_AW_CALLER_WORKFLOW_ID: "${{ github.repository }}/copilot-token-audit" GH_AW_EFFECTIVE_TOKENS: ${{ needs.agent.outputs.effective_tokens }} GH_AW_ENGINE_ID: "copilot" GH_AW_ENGINE_MODEL: ${{ needs.agent.outputs.model }} - GH_AW_WORKFLOW_ID: "copilot-token-usage-analyzer" - GH_AW_WORKFLOW_NAME: "Copilot Token Usage Analyzer" + GH_AW_TRACKER_ID: "copilot-token-audit" + GH_AW_WORKFLOW_ID: "copilot-token-audit" + GH_AW_WORKFLOW_NAME: "Daily Copilot Token Usage Audit" outputs: code_push_failure_count: ${{ steps.process_safe_outputs.outputs.code_push_failure_count }} code_push_failure_errors: ${{ steps.process_safe_outputs.outputs.code_push_failure_errors }} create_discussion_error_count: ${{ steps.process_safe_outputs.outputs.create_discussion_error_count }} create_discussion_errors: ${{ steps.process_safe_outputs.outputs.create_discussion_errors }} - created_issue_number: ${{ steps.process_safe_outputs.outputs.created_issue_number }} - created_issue_url: ${{ steps.process_safe_outputs.outputs.created_issue_url }} process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }} process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} steps: @@ -1195,7 +1384,7 @@ jobs: GH_AW_ALLOWED_DOMAINS: "*.pythonhosted.org,anaconda.org,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,binstar.org,bootstrap.pypa.io,conda.anaconda.org,conda.binstar.org,crates.io,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,files.pythonhosted.org,github.com,host.docker.internal,index.crates.io,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,pip.pypa.io,ppa.launchpad.net,pypi.org,pypi.python.org,raw.githubusercontent.com,registry.npmjs.org,repo.anaconda.com,repo.continuum.io,s.symcb.com,s.symcd.com,security.ubuntu.com,static.crates.io,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" GITHUB_SERVER_URL: ${{ github.server_url }} GITHUB_API_URL: ${{ github.api_url }} - GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_issue\":{\"close_older_issues\":true,\"expires\":48,\"labels\":[\"automated-analysis\",\"token-usage\",\"copilot\"],\"max\":1,\"title_prefix\":\"๐Ÿ“Š Copilot Token Usage Report: \"},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"true\"},\"upload_asset\":{\"allowed-exts\":[\".png\",\".jpg\",\".jpeg\"],\"branch\":\"assets/${{ github.workflow }}\",\"max-size\":10240}}" + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"audits\",\"close_older_discussions\":true,\"expires\":72,\"fallback_to_issue\":true,\"max\":1,\"title_prefix\":\"[copilot-token-audit] \"},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"true\"},\"upload_asset\":{\"allowed-exts\":[\".png\",\".jpg\",\".jpeg\"],\"branch\":\"assets/${{ github.workflow }}\",\"max-size\":10240}}" with: github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} script: | @@ -1222,7 +1411,7 @@ jobs: permissions: contents: read env: - GH_AW_WORKFLOW_ID_SANITIZED: copilottokenusageanalyzer + GH_AW_WORKFLOW_ID_SANITIZED: copilottokenaudit steps: - name: Checkout actions folder uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -1330,7 +1519,8 @@ jobs: GH_AW_ASSETS_BRANCH: "assets/${{ github.workflow }}" GH_AW_ASSETS_MAX_SIZE_KB: 10240 GH_AW_ASSETS_ALLOWED_EXTS: ".png,.jpg,.jpeg" - GH_AW_WORKFLOW_NAME: "Copilot Token Usage Analyzer" + GH_AW_WORKFLOW_NAME: "Daily Copilot Token Usage Audit" + GH_AW_TRACKER_ID: "copilot-token-audit" GH_AW_ENGINE_ID: "copilot" GH_AW_ENGINE_MODEL: ${{ needs.agent.outputs.model }} with: diff --git a/.github/workflows/copilot-token-audit.md b/.github/workflows/copilot-token-audit.md new file mode 100644 index 00000000000..6bde5efc64d --- /dev/null +++ b/.github/workflows/copilot-token-audit.md @@ -0,0 +1,222 @@ +--- +description: Daily audit of Copilot token usage across all agentic workflows with historical trend tracking +on: + schedule: + - cron: "daily around 12:00 on weekdays" + workflow_dispatch: +permissions: + contents: read + actions: read + issues: read + pull-requests: read +tracker-id: copilot-token-audit +engine: copilot +tools: + agentic-workflows: + bash: + - "*" +steps: + - name: Install gh-aw CLI + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if gh extension list | grep -q "github/gh-aw"; then + gh extension upgrade gh-aw || true + else + gh extension install github/gh-aw + fi + gh aw --version + - name: Download Copilot workflow logs + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + mkdir -p /tmp/gh-aw/token-audit + + # Download last 30 days of Copilot logs as JSON + gh aw logs \ + --engine copilot \ + --start-date -30d \ + --json \ + -c 500 \ + > /tmp/gh-aw/token-audit/copilot-logs.json + + TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/copilot-logs.json) + echo "โœ… Downloaded $TOTAL Copilot workflow runs (last 30 days)" +timeout-minutes: 25 +imports: + - uses: shared/daily-audit-discussion.md + with: + title-prefix: "[copilot-token-audit] " + - uses: shared/repo-memory-standard.md + with: + branch-name: "memory/token-audit" + description: "Historical daily Copilot token usage snapshots" + - copilot-setup-steps.yml + - shared/reporting.md + - shared/python-dataviz.md +features: + copilot-requests: true +--- +{{#runtime-import? .github/shared-instructions.md}} + +# Daily Copilot Token Usage Audit + +You are the Copilot Token Auditor โ€” a workflow that tracks daily token consumption across all Copilot-powered agentic workflows in this repository and maintains a historical record for trend analysis. + +## Mission + +1. Parse the pre-downloaded Copilot workflow logs and compute per-workflow token usage metrics. +2. Persist today's snapshot to repo-memory so the optimizer (and future runs of this audit) can read historical data. +3. Publish a concise audit discussion summarizing today's usage, trends, and cost highlights. + +## Data Sources + +### Pre-downloaded logs + +The workflow logs are at `/tmp/gh-aw/token-audit/copilot-logs.json`. The file is the raw JSON output of `gh aw logs --json` with this top-level shape: + +```json +{ + "summary": { "total_runs": N, "total_tokens": N, "total_cost": F, ... }, + "runs": [ ... ], + "tool_usage": [ ... ], + "mcp_tool_usage": { ... }, + ... +} +``` + +Each element of `.runs` is a `RunData` object with (among others): + +| Field | Type | Notes | +|---|---|---| +| `workflow_name` | string | Human-readable name | +| `workflow_path` | string | `.github/workflows/....lock.yml` | +| `token_usage` | int | Total tokens (`omitempty` โ€” treat missing/null as 0) | +| `effective_tokens` | int | Cost-normalized tokens | +| `estimated_cost` | float | USD cost (`omitempty` โ€” treat missing/null as 0) | +| `action_minutes` | float | Billable GitHub Actions minutes | +| `turns` | int | Number of agent turns | +| `duration` | string | Human-readable duration | +| `created_at` | ISO 8601 | Run creation time | +| `database_id` | int64 | Unique run ID | +| `url` | string | Link to the run | +| `status` | string | `completed`, `in_progress`, etc. | +| `conclusion` | string | `success`, `failure`, etc. | +| `error_count` | int | Errors encountered | +| `warning_count` | int | Warnings encountered | +| `token_usage_summary` | object or null | Firewall-level breakdown by model | + +### Repo-memory (historical snapshots) + +Previous snapshots live at `/tmp/gh-aw/repo-memory/default/`. Each daily snapshot is stored as a JSON file named `YYYY-MM-DD.json` with the schema below. + +## Phase 1 โ€” Process Logs + +Write a Python script to `/tmp/gh-aw/python/process_audit.py` and run it. The script must: + +1. Load `/tmp/gh-aw/token-audit/copilot-logs.json` and extract `.runs`. +2. Filter to `status == "completed"` runs only. +3. Group by `workflow_name` and compute per-workflow aggregates: + - `run_count`, `total_tokens`, `avg_tokens`, `total_cost`, `avg_cost`, `total_turns`, `avg_turns`, `total_action_minutes`, `error_count`, `warning_count` +4. Compute an overall summary: total runs, total tokens, total cost, total action minutes. +5. Sort workflows descending by `total_tokens`. +6. Save the result to `/tmp/gh-aw/python/data/audit_snapshot.json` with this shape: + +```json +{ + "date": "YYYY-MM-DD", + "period_days": 30, + "overall": { + "total_runs": N, + "total_tokens": N, + "total_cost": F, + "total_action_minutes": F + }, + "workflows": [ + { + "workflow_name": "...", + "run_count": N, + "total_tokens": N, + "avg_tokens": N, + "total_cost": F, + "avg_cost": F, + "total_turns": N, + "avg_turns": F, + "total_action_minutes": F, + "error_count": N, + "warning_count": N, + "latest_run_url": "..." + } + ] +} +``` + +Handle null/missing `token_usage` and `estimated_cost` by treating them as 0. + +## Phase 2 โ€” Persist Snapshot to Repo-Memory + +1. Read the snapshot from `/tmp/gh-aw/python/data/audit_snapshot.json`. +2. Copy it to `/tmp/gh-aw/repo-memory/default/YYYY-MM-DD.json` (today's UTC date). +3. This file is what the optimizer workflow reads to identify high-usage workflows. + +Also maintain a rolling summary file at `/tmp/gh-aw/repo-memory/default/rolling-summary.json` that contains an array of daily overall totals (date, total_tokens, total_cost, total_runs, total_action_minutes) for the last 90 entries. Load the existing file, append today's entry, trim to 90, and save. + +## Phase 3 โ€” Generate Charts + +Create a Python script to generate two charts: + +1. **Token usage by workflow** (horizontal bar chart): Top 15 workflows by total token usage. +2. **Historical trend** (line chart): Daily total tokens and cost from `rolling-summary.json` โ€” if available. If only 1 data point, skip this chart. + +Save charts to `/tmp/gh-aw/python/charts/`. Upload them as assets. + +## Phase 4 โ€” Publish Audit Discussion + +Create a discussion with these sections: + +### Report Template + +``` +### ๐Ÿ“Š Executive Summary + +- **Period**: last 30 days (YYYY-MM-DD to YYYY-MM-DD) +- **Total runs**: N +- **Total tokens**: N (formatted with commas) +- **Total cost**: $X.XX +- **Total Actions minutes**: X.X min +- **Active workflows**: N + +### ๐Ÿ† Top 5 Workflows by Token Usage + +| Workflow | Runs | Total Tokens | Avg Tokens | Total Cost | Avg Cost | +|---|---|---|---|---|---| +| ... | ... | ... | ... | ... | ... | + +### ๐Ÿ“ˆ Trends + +[Embed chart images here using uploaded asset URLs] + +If historical data is available, note week-over-week token and cost changes. + +
+Full Per-Workflow Breakdown + +[Complete table of all workflows sorted by total tokens] + +
+ +### ๐Ÿ’ก Observations + +- Identify any workflow with >30% of total tokens as a "heavy hitter" +- Note workflows with high error/warning counts relative to runs +- Flag any workflow whose avg tokens per run exceeds 100,000 + +**Data snapshot**: `memory/token-audit/YYYY-MM-DD.json` +``` + +## Important Notes + +- Use `// 0` (null coalescing) in jq and `.get(field, 0)` in Python for nullable numeric fields. +- Charts follow the python-dataviz shared component conventions (300 DPI, seaborn whitegrid, external data files only). +- Keep the discussion concise โ€” the optimizer workflow will do the deep analysis. diff --git a/.github/workflows/copilot-token-optimizer.lock.yml b/.github/workflows/copilot-token-optimizer.lock.yml index 93d252cd5d3..e0df68e3d54 100644 --- a/.github/workflows/copilot-token-optimizer.lock.yml +++ b/.github/workflows/copilot-token-optimizer.lock.yml @@ -20,18 +20,22 @@ # # For more information: https://github.github.com/gh-aw/introduction/overview/ # -# Analyzes the most expensive Copilot workflow identified by the token usage analyzer and creates an optimization issue with specific token-saving recommendations +# Daily optimizer that identifies a high-token-usage Copilot workflow, audits its runs, and recommends efficiency improvements # # Resolved workflow manifest: # Imports: +# - copilot-setup-steps.yml +# - shared/daily-audit-discussion.md +# - shared/repo-memory-standard.md # - shared/reporting.md -# - shared/token-logs-24h.md # -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"cd74081c3c3ea4a91bba243719280f31f059f6049c199c3ae50cd45aa09a80b4","strict":true,"agent_id":"copilot"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"6c0be3943af1605d9debf1acb7d9a8652afa2438ec55c1c3685d325da6fc34c8","strict":true,"agent_id":"copilot"} -name: "Copilot Token Optimizer" +name: "Copilot Token Usage Optimizer" "on": - # skip-if-match: is:issue is:open in:title "โšก Copilot Token Optimization:" # Skip-if-match processed as search check in pre-activation job + schedule: + - cron: "33 14 * * 1-5" + # Friendly format: daily around 14:00 on weekdays (scattered) workflow_dispatch: inputs: aw_context: @@ -39,29 +43,16 @@ name: "Copilot Token Optimizer" description: Agent caller context (used internally by Agentic Workflows). required: false type: string - workflow_run: - # zizmor: ignore[dangerous-triggers] - workflow_run trigger is secured with role and fork validation - branches: - - main - types: - - completed - workflows: - - Copilot Token Usage Analyzer permissions: {} concurrency: group: "gh-aw-${{ github.workflow }}" -run-name: "Copilot Token Optimizer" +run-name: "Copilot Token Usage Optimizer" jobs: activation: - needs: pre_activation - # zizmor: ignore[dangerous-triggers] - workflow_run trigger is secured with role and fork validation - if: > - (needs.pre_activation.outputs.activated == 'true') && (github.event_name != 'workflow_run' || github.event.workflow_run.repository.id == github.repository_id && - (!(github.event.workflow_run.repository.fork))) runs-on: ubuntu-slim permissions: contents: read @@ -90,7 +81,7 @@ jobs: GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || 'auto' }} GH_AW_INFO_VERSION: "latest" GH_AW_INFO_AGENT_VERSION: "latest" - GH_AW_INFO_WORKFLOW_NAME: "Copilot Token Optimizer" + GH_AW_INFO_WORKFLOW_NAME: "Copilot Token Usage Optimizer" GH_AW_INFO_EXPERIMENTAL: "false" GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true" GH_AW_INFO_STAGED: "false" @@ -132,7 +123,6 @@ jobs: env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_SAFE_OUTPUTS: ${{ runner.temp }}/gh-aw/safeoutputs/outputs.jsonl - GH_AW_ENV_WORKFLOW_NAME: ${{ env.WORKFLOW_NAME }} GH_AW_GITHUB_ACTOR: ${{ github.actor }} GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} @@ -141,20 +131,23 @@ jobs: GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + GH_AW_WIKI_NOTE: ${{ '' }} # poutine:ignore untrusted_checkout_exec run: | bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh { - cat << 'GH_AW_PROMPT_9c56a6beaa447420_EOF' + cat << 'GH_AW_PROMPT_3016a484a9e1c1a1_EOF' - GH_AW_PROMPT_9c56a6beaa447420_EOF + GH_AW_PROMPT_3016a484a9e1c1a1_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/agentic_workflows_guide.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/repo_memory_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" - cat << 'GH_AW_PROMPT_9c56a6beaa447420_EOF' + cat << 'GH_AW_PROMPT_3016a484a9e1c1a1_EOF' - Tools: create_issue, missing_tool, missing_data, noop + Tools: create_discussion, missing_tool, missing_data, noop The following GitHub context information is available for this workflow: @@ -184,21 +177,18 @@ jobs: {{/if}} - GH_AW_PROMPT_9c56a6beaa447420_EOF + GH_AW_PROMPT_3016a484a9e1c1a1_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" - cat << 'GH_AW_PROMPT_9c56a6beaa447420_EOF' + cat << 'GH_AW_PROMPT_3016a484a9e1c1a1_EOF' - {{#runtime-import .github/workflows/shared/token-logs-24h.md}} {{#runtime-import .github/workflows/shared/reporting.md}} {{#runtime-import .github/workflows/copilot-token-optimizer.md}} - GH_AW_PROMPT_9c56a6beaa447420_EOF + GH_AW_PROMPT_3016a484a9e1c1a1_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - GH_AW_ENV_WORKFLOW_NAME: ${{ env.WORKFLOW_NAME }} - GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -209,7 +199,6 @@ jobs: uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - GH_AW_ENV_WORKFLOW_NAME: ${{ env.WORKFLOW_NAME }} GH_AW_GITHUB_ACTOR: ${{ github.actor }} GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} @@ -218,7 +207,12 @@ jobs: GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} - GH_AW_NEEDS_PRE_ACTIVATION_OUTPUTS_ACTIVATED: ${{ needs.pre_activation.outputs.activated }} + GH_AW_MEMORY_BRANCH_NAME: 'memory/token-audit' + GH_AW_MEMORY_CONSTRAINTS: "\n\n**Constraints:**\n- **Allowed Files**: Only files matching patterns: memory/token-audit/*.json, memory/token-audit/*.jsonl, memory/token-audit/*.csv, memory/token-audit/*.md\n- **Max File Size**: 102400 bytes (0.10 MB) per file\n- **Max File Count**: 100 files per commit\n- **Max Patch Size**: 10240 bytes (10 KB) total per push (max: 100 KB)\n" + GH_AW_MEMORY_DESCRIPTION: ' Historical daily Copilot token usage snapshots (shared with copilot-token-audit)' + GH_AW_MEMORY_DIR: '/tmp/gh-aw/repo-memory/default/' + GH_AW_MEMORY_TARGET_REPO: ' of the current repository' + GH_AW_WIKI_NOTE: '' with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -230,7 +224,6 @@ jobs: return await substitutePlaceholders({ file: process.env.GH_AW_PROMPT, substitutions: { - GH_AW_ENV_WORKFLOW_NAME: process.env.GH_AW_ENV_WORKFLOW_NAME, GH_AW_GITHUB_ACTOR: process.env.GH_AW_GITHUB_ACTOR, GH_AW_GITHUB_EVENT_COMMENT_ID: process.env.GH_AW_GITHUB_EVENT_COMMENT_ID, GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: process.env.GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER, @@ -239,7 +232,12 @@ jobs: GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY, GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID, GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE, - GH_AW_NEEDS_PRE_ACTIVATION_OUTPUTS_ACTIVATED: process.env.GH_AW_NEEDS_PRE_ACTIVATION_OUTPUTS_ACTIVATED + GH_AW_MEMORY_BRANCH_NAME: process.env.GH_AW_MEMORY_BRANCH_NAME, + GH_AW_MEMORY_CONSTRAINTS: process.env.GH_AW_MEMORY_CONSTRAINTS, + GH_AW_MEMORY_DESCRIPTION: process.env.GH_AW_MEMORY_DESCRIPTION, + GH_AW_MEMORY_DIR: process.env.GH_AW_MEMORY_DIR, + GH_AW_MEMORY_TARGET_REPO: process.env.GH_AW_MEMORY_TARGET_REPO, + GH_AW_WIKI_NOTE: process.env.GH_AW_WIKI_NOTE } }); - name: Validate prompt placeholders @@ -306,35 +304,67 @@ jobs: echo "GH_AW_SAFE_OUTPUTS=${RUNNER_TEMP}/gh-aw/safeoutputs/outputs.jsonl" >> "$GITHUB_OUTPUT" echo "GH_AW_SAFE_OUTPUTS_CONFIG_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" >> "$GITHUB_OUTPUT" echo "GH_AW_SAFE_OUTPUTS_TOOLS_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/tools.json" >> "$GITHUB_OUTPUT" - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - name: Create gh-aw temp directory run: bash ${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh - name: Configure gh CLI for GitHub Enterprise run: bash ${RUNNER_TEMP}/gh-aw/actions/configure_gh_for_ghe.sh env: GH_TOKEN: ${{ github.token }} + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Setup Go + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 + with: + go-version: '1.25' + cache: false + - name: Capture GOROOT for AWF chroot mode + run: echo "GOROOT=$(go env GOROOT)" >> "$GITHUB_ENV" + - name: Setup Node.js + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version: '24' + cache: 'npm' + cache-dependency-path: 'actions/setup/js/package-lock.json' + package-manager-cache: false + - name: Install gh-aw extension + run: curl -fsSL https://raw.githubusercontent.com/github/gh-aw/refs/heads/main/install-gh-aw.sh | bash + - name: Install npm dependencies + run: cd actions/setup/js && npm ci + - name: Install development dependencies + run: make deps-dev + - name: Build code + run: make build - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Restore 24h token logs from cache - run: "set -euo pipefail\nTOKEN_LOGS_DIR=\"/tmp/gh-aw/token-logs\"\nmkdir -p \"$TOKEN_LOGS_DIR\"\nTODAY=$(date -u +%Y-%m-%d)\n\n# Look for today's pre-fetched data from the Token Logs Fetch workflow\nFETCH_RUN_ID=$(gh run list \\\n --workflow \"token-logs-fetch.lock.yml\" \\\n --status success \\\n --limit 1 \\\n --json databaseId \\\n --jq '.[0].databaseId' 2>/dev/null || echo \"\")\n\nUSED_CACHE=false\nif [ -n \"$FETCH_RUN_ID\" ]; then\n CACHE_TMP=\"/tmp/gh-aw/token-logs-fetch-cache\"\n mkdir -p \"$CACHE_TMP\"\n gh run download \"$FETCH_RUN_ID\" \\\n --repo \"$GITHUB_REPOSITORY\" \\\n --name \"cache-memory\" \\\n --dir \"$CACHE_TMP\" \\\n 2>/dev/null || true\n CACHE_DATE=$(cat \"$CACHE_TMP/token-logs/fetch-date.txt\" 2>/dev/null || echo \"\")\n if [ \"$CACHE_DATE\" = \"$TODAY\" ] && \\\n [ -s \"$CACHE_TMP/token-logs/copilot-runs.json\" ] && \\\n [ -s \"$CACHE_TMP/token-logs/claude-runs.json\" ]; then\n echo \"โœ… Using pre-fetched logs from Token Logs Fetch run $FETCH_RUN_ID (date: $CACHE_DATE)\"\n cp \"$CACHE_TMP/token-logs/copilot-runs.json\" \"$TOKEN_LOGS_DIR/copilot-runs.json\"\n cp \"$CACHE_TMP/token-logs/claude-runs.json\" \"$TOKEN_LOGS_DIR/claude-runs.json\"\n USED_CACHE=true\n else\n echo \"โ„น๏ธ No valid cached logs found (cache date: ${CACHE_DATE:-none}, today: $TODAY)\"\n fi\nfi\n\nif [ \"$USED_CACHE\" != \"true\" ]; then\n echo \"๐Ÿ“ฅ Downloading Copilot and Claude workflow runs from last 24 hours...\"\n\n # Ensure gh-aw CLI is installed โ€” this shared step runs before user-defined steps.\n # Install failure is non-fatal to match the fallback-safe behavior of gh aw logs below.\n GH_AW_AVAILABLE=false\n if gh extension list 2>/dev/null | grep -q \"github/gh-aw\"; then\n GH_AW_AVAILABLE=true\n else\n echo \"๐Ÿ“ฆ Installing gh-aw CLI extension...\"\n if gh extension install github/gh-aw 2>/dev/null; then\n GH_AW_AVAILABLE=true\n else\n echo \"โš ๏ธ Failed to install gh-aw CLI extension; continuing with empty token logs.\"\n fi\n fi\n\n # Check GitHub API rate limit before downloading logs\n RATE_INFO=$(gh api rate_limit --jq '.rate | \"\\(.remaining)/\\(.limit) (resets \\(.reset | todate))\"' 2>/dev/null || echo \"unknown\")\n echo \"๐Ÿ“Š GitHub API rate limit before download: $RATE_INFO\"\n\n if [ \"$GH_AW_AVAILABLE\" = \"true\" ]; then\n LOGS_STDERR=\"/tmp/token-logs-copilot-stderr.log\"\n gh aw logs \\\n --engine copilot \\\n --start-date -1d \\\n --json \\\n -c 300 \\\n > /tmp/token-logs-copilot-raw.json 2>\"$LOGS_STDERR\"\n COPILOT_EXIT=$?\n if [ \"$COPILOT_EXIT\" -ne 0 ]; then\n echo \"โš ๏ธ gh aw logs --engine copilot exited with code $COPILOT_EXIT\"\n cat \"$LOGS_STDERR\" | tail -5\n echo '{\"runs\":[]}' > /tmp/token-logs-copilot-raw.json\n fi\n else\n echo '{\"runs\":[]}' > /tmp/token-logs-copilot-raw.json\n fi\n jq '.runs // []' /tmp/token-logs-copilot-raw.json > \"$TOKEN_LOGS_DIR/copilot-runs.json\" 2>/dev/null || echo \"[]\" > \"$TOKEN_LOGS_DIR/copilot-runs.json\"\n\n if [ \"$GH_AW_AVAILABLE\" = \"true\" ]; then\n LOGS_STDERR=\"/tmp/token-logs-claude-stderr.log\"\n gh aw logs \\\n --engine claude \\\n --start-date -1d \\\n --json \\\n -c 300 \\\n > /tmp/token-logs-claude-raw.json 2>\"$LOGS_STDERR\"\n CLAUDE_EXIT=$?\n if [ \"$CLAUDE_EXIT\" -ne 0 ]; then\n echo \"โš ๏ธ gh aw logs --engine claude exited with code $CLAUDE_EXIT\"\n cat \"$LOGS_STDERR\" | tail -5\n echo '{\"runs\":[]}' > /tmp/token-logs-claude-raw.json\n fi\n else\n echo '{\"runs\":[]}' > /tmp/token-logs-claude-raw.json\n fi\n jq '.runs // []' /tmp/token-logs-claude-raw.json > \"$TOKEN_LOGS_DIR/claude-runs.json\" 2>/dev/null || echo \"[]\" > \"$TOKEN_LOGS_DIR/claude-runs.json\"\n\n # Check rate limit after downloads to see consumption\n RATE_INFO=$(gh api rate_limit --jq '.rate | \"\\(.remaining)/\\(.limit) (resets \\(.reset | todate))\"' 2>/dev/null || echo \"unknown\")\n echo \"๐Ÿ“Š GitHub API rate limit after download: $RATE_INFO\"\nfi\n\necho \"โœ… Copilot runs: $(jq 'length' \"$TOKEN_LOGS_DIR/copilot-runs.json\")\"\necho \"โœ… Claude runs: $(jq 'length' \"$TOKEN_LOGS_DIR/claude-runs.json\")\"" + GH_TOKEN: ${{ github.token }} + name: Recompile workflows + run: make recompile || true + - name: Setup uv + uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # eac588ad8def6316056a12d4907a9d4d84ff7a3b + - name: Install Go language server (gopls) + run: go install golang.org/x/tools/gopls@latest + - name: Install TypeScript language server + run: npm install -g typescript-language-server typescript - env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Install gh-aw CLI - run: | + run: |- if gh extension list | grep -q "github/gh-aw"; then gh extension upgrade gh-aw || true else gh extension install github/gh-aw fi gh aw --version - - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Find and download artifacts from the most expensive Copilot workflow - run: "set -euo pipefail\nmkdir -p /tmp/token-optimizer\n\n# Use pre-fetched logs from the shared token-logs-24h pre-step\ncp /tmp/gh-aw/token-logs/copilot-runs.json /tmp/token-optimizer/copilot-runs.json 2>/dev/null || echo \"[]\" > /tmp/token-optimizer/copilot-runs.json\n\nRUN_COUNT=$(jq 'length' /tmp/token-optimizer/copilot-runs.json 2>/dev/null || echo 0)\necho \"Found ${RUN_COUNT} Copilot runs\"\n\nif [ \"$RUN_COUNT\" -eq 0 ]; then\n echo \"No Copilot runs found, nothing to optimize\"\n exit 0\nfi\n\n# Find the most expensive workflow (by total tokens across all its runs)\n# Schema: gh aw logs --json โ†’ LogsData.runs[] (RunData from pkg/cli/logs_report.go)\n# .workflow_name (string), .token_usage (int, omitempty โ†’ null when 0),\n# .estimated_cost (float, omitempty), .database_id (int64), .created_at (time), .url (string)\necho \"๐Ÿ” Identifying most expensive workflow...\"\njq -r '\n sort_by(.workflow_name) |\n group_by(.workflow_name) |\n map({\n workflow: .[0].workflow_name,\n total_tokens: (map(.token_usage // 0) | add),\n total_cost: (map(.estimated_cost // 0) | add),\n run_count: length,\n avg_tokens: ((map(.token_usage // 0) | add) / length),\n run_ids: map(.database_id),\n latest_run_id: (sort_by(.created_at) | last | .database_id),\n latest_run_url: (sort_by(.created_at) | last | .url)\n }) |\n sort_by(.total_tokens) | reverse | .[0]\n' /tmp/token-optimizer/copilot-runs.json > /tmp/token-optimizer/top-workflow.json\n\nWORKFLOW_NAME=$(jq -r '.workflow' /tmp/token-optimizer/top-workflow.json)\nLATEST_RUN_ID=$(jq -r '.latest_run_id' /tmp/token-optimizer/top-workflow.json)\necho \"Most expensive workflow: $WORKFLOW_NAME (run: $LATEST_RUN_ID)\"\necho \"WORKFLOW_NAME=$WORKFLOW_NAME\" >> \"$GITHUB_ENV\"\n\n# Download the firewall-audit-logs artifact from the latest run of that workflow\nARTIFACT_DIR=\"/tmp/token-optimizer/artifacts\"\nmkdir -p \"$ARTIFACT_DIR\"\n\necho \"๐Ÿ“ฅ Downloading firewall-audit-logs from run $LATEST_RUN_ID...\"\ngh run download \"$LATEST_RUN_ID\" \\\n --repo \"$GITHUB_REPOSITORY\" \\\n --name \"firewall-audit-logs\" \\\n --dir \"$ARTIFACT_DIR\" \\\n 2>/dev/null || true\n\n# Also download agent artifacts (contains prompt and tool usage logs)\necho \"๐Ÿ“ฅ Downloading agent artifacts from run $LATEST_RUN_ID...\"\ngh run download \"$LATEST_RUN_ID\" \\\n --repo \"$GITHUB_REPOSITORY\" \\\n --name \"agent\" \\\n --dir \"$ARTIFACT_DIR/agent\" \\\n 2>/dev/null || true\n\n# Find token-usage.jsonl\nUSAGE_FILE=$(find \"$ARTIFACT_DIR\" -name \"token-usage.jsonl\" 2>/dev/null | head -1)\nif [ -n \"$USAGE_FILE\" ]; then\n echo \"Found token-usage.jsonl: $USAGE_FILE\"\n cp \"$USAGE_FILE\" /tmp/token-optimizer/token-usage.jsonl\n wc -l < /tmp/token-optimizer/token-usage.jsonl\nelse\n echo \"No token-usage.jsonl found in artifacts\"\n touch /tmp/token-optimizer/token-usage.jsonl\nfi\n\n# Find the workflow markdown source\nWORKFLOW_MD_NAME=$(echo \"$WORKFLOW_NAME\" | tr '[:upper:]' '[:lower:]' | tr ' ' '-')\nWORKFLOW_MD=\".github/workflows/${WORKFLOW_MD_NAME}.md\"\nif [ -f \"$WORKFLOW_MD\" ]; then\n echo \"Found workflow source: $WORKFLOW_MD\"\n cp \"$WORKFLOW_MD\" /tmp/token-optimizer/workflow-source.md\nelse\n echo \"Workflow source not found at $WORKFLOW_MD, searching...\"\n FOUND_MD=$(find .github/workflows -name \"*.md\" -exec grep -l \"^name: $WORKFLOW_NAME\" {} \\; 2>/dev/null | head -1 || true)\n if [ -n \"$FOUND_MD\" ]; then\n echo \"Found: $FOUND_MD\"\n cp \"$FOUND_MD\" /tmp/token-optimizer/workflow-source.md\n fi\nfi\n\n# Extract declared tools from workflow source (if available)\nif [ -f /tmp/token-optimizer/workflow-source.md ]; then\n echo \"๐Ÿ“‹ Extracting declared tools from workflow source...\"\n # Extract tools section from frontmatter\n sed -n '/^---$/,/^---$/p' /tmp/token-optimizer/workflow-source.md | \\\n grep -A20 \"^tools:\" | head -30 > /tmp/token-optimizer/declared-tools.txt || true\n cat /tmp/token-optimizer/declared-tools.txt\nfi\n" + # Repo memory git-based storage configuration from frontmatter processed below + - name: Clone repo-memory branch (default) + env: + GH_TOKEN: ${{ github.token }} + GITHUB_SERVER_URL: ${{ github.server_url }} + BRANCH_NAME: memory/token-audit + TARGET_REPO: ${{ github.repository }} + MEMORY_DIR: /tmp/gh-aw/repo-memory/default + CREATE_ORPHAN: true + run: bash ${RUNNER_TEMP}/gh-aw/actions/clone_repo_memory_branch.sh - name: Configure Git credentials env: REPO_NAME: ${{ github.repository }} @@ -379,28 +409,52 @@ jobs: await determineAutomaticLockdown(github, context, core); - name: Download container images run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.25.13 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.13 ghcr.io/github/gh-aw-firewall/squid:0.25.13 ghcr.io/github/gh-aw-mcpg:v0.2.12 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine + - name: Install gh-aw extension + env: + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + run: | + # Check if gh-aw extension is already installed + if gh extension list | grep -q "github/gh-aw"; then + echo "gh-aw extension already installed, upgrading..." + gh extension upgrade gh-aw || true + else + echo "Installing gh-aw extension..." + gh extension install github/gh-aw + fi + gh aw --version + # Copy the gh-aw binary to ${RUNNER_TEMP}/gh-aw for MCP server containerization + mkdir -p ${RUNNER_TEMP}/gh-aw + GH_AW_BIN=$(which gh-aw 2>/dev/null || find ~/.local/share/gh/extensions/gh-aw -name 'gh-aw' -type f 2>/dev/null | head -1) + if [ -n "$GH_AW_BIN" ] && [ -f "$GH_AW_BIN" ]; then + cp "$GH_AW_BIN" ${RUNNER_TEMP}/gh-aw/gh-aw + chmod +x ${RUNNER_TEMP}/gh-aw/gh-aw + echo "Copied gh-aw binary to ${RUNNER_TEMP}/gh-aw/gh-aw" + else + echo "::error::Failed to find gh-aw binary for MCP server" + exit 1 + fi - name: Write Safe Outputs Config run: | mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_a7ada65c847dda94_EOF' - {"create_issue":{"close_older_issues":true,"expires":168,"labels":["automated-analysis","token-optimization","copilot","cost-reduction"],"max":1,"title_prefix":"โšก Copilot Token Optimization: "},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"}} - GH_AW_SAFE_OUTPUTS_CONFIG_a7ada65c847dda94_EOF + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_74130b46a97c194e_EOF' + {"create_discussion":{"category":"audits","close_older_discussions":true,"expires":168,"fallback_to_issue":true,"max":1,"title_prefix":"[copilot-token-optimizer] "},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"push_repo_memory":{"memories":[{"dir":"/tmp/gh-aw/repo-memory/default","id":"default","max_file_count":100,"max_file_size":102400,"max_patch_size":10240}]}} + GH_AW_SAFE_OUTPUTS_CONFIG_74130b46a97c194e_EOF - name: Write Safe Outputs Tools run: | - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_242d07d61e9c5df2_EOF' + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_0a66531de00e3046_EOF' { "description_suffixes": { - "create_issue": " CONSTRAINTS: Maximum 1 issue(s) can be created. Title will be prefixed with \"โšก Copilot Token Optimization: \". Labels [\"automated-analysis\" \"token-optimization\" \"copilot\" \"cost-reduction\"] will be automatically added." + "create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[copilot-token-optimizer] \". Discussions will be created in category \"audits\"." }, "repo_params": {}, "dynamic_tools": [] } - GH_AW_SAFE_OUTPUTS_TOOLS_META_242d07d61e9c5df2_EOF - cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_1e3371b97c635946_EOF' + GH_AW_SAFE_OUTPUTS_TOOLS_META_0a66531de00e3046_EOF + cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_9961baf6f509ded5_EOF' { - "create_issue": { + "create_discussion": { "defaultMax": 1, "fields": { "body": { @@ -409,22 +463,15 @@ jobs: "sanitize": true, "maxLength": 65000 }, - "labels": { - "type": "array", - "itemType": "string", - "itemSanitize": true, - "itemMaxLength": 128 - }, - "parent": { - "issueOrPRNumber": true + "category": { + "type": "string", + "sanitize": true, + "maxLength": 128 }, "repo": { "type": "string", "maxLength": 256 }, - "temporary_id": { - "type": "string" - }, "title": { "required": true, "type": "string", @@ -491,7 +538,7 @@ jobs: } } } - GH_AW_SAFE_OUTPUTS_VALIDATION_1e3371b97c635946_EOF + GH_AW_SAFE_OUTPUTS_VALIDATION_9961baf6f509ded5_EOF node ${RUNNER_TEMP}/gh-aw/actions/generate_safe_outputs_tools.cjs - name: Generate Safe Outputs MCP Server Config id: safe-outputs-config @@ -542,6 +589,7 @@ jobs: GITHUB_MCP_GUARD_MIN_INTEGRITY: ${{ steps.determine-automatic-lockdown.outputs.min_integrity }} GITHUB_MCP_GUARD_REPOS: ${{ steps.determine-automatic-lockdown.outputs.repos }} GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | set -eo pipefail mkdir -p /tmp/gh-aw/mcp-config @@ -561,9 +609,28 @@ jobs: export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.12' mkdir -p /home/runner/.copilot - cat << GH_AW_MCP_CONFIG_366ff52311e76854_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh + cat << GH_AW_MCP_CONFIG_2153288600fbf2ef_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh { "mcpServers": { + "agenticworkflows": { + "type": "stdio", + "container": "localhost/gh-aw:dev", + "mounts": ["\${GITHUB_WORKSPACE}:\${GITHUB_WORKSPACE}:rw", "/tmp/gh-aw:/tmp/gh-aw:rw"], + "args": ["--network", "host", "-w", "\${GITHUB_WORKSPACE}"], + "env": { + "DEBUG": "*", + "GITHUB_TOKEN": "\${GITHUB_TOKEN}", + "GITHUB_ACTOR": "\${GITHUB_ACTOR}", + "GITHUB_REPOSITORY": "\${GITHUB_REPOSITORY}" + }, + "guard-policies": { + "write-sink": { + "accept": [ + "*" + ] + } + } + }, "github": { "type": "stdio", "container": "ghcr.io/github/github-mcp-server:v0.32.0", @@ -571,7 +638,7 @@ jobs: "GITHUB_HOST": "\${GITHUB_SERVER_URL}", "GITHUB_PERSONAL_ACCESS_TOKEN": "\${GITHUB_MCP_SERVER_TOKEN}", "GITHUB_READ_ONLY": "1", - "GITHUB_TOOLSETS": "context,repos,issues,pull_requests,actions" + "GITHUB_TOOLSETS": "context,repos,issues,pull_requests" }, "guard-policies": { "allow-only": { @@ -602,7 +669,7 @@ jobs: "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" } } - GH_AW_MCP_CONFIG_366ff52311e76854_EOF + GH_AW_MCP_CONFIG_2153288600fbf2ef_EOF - name: Download activation artifact uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: @@ -765,6 +832,15 @@ jobs: if [ ! -f /tmp/gh-aw/agent_output.json ]; then echo '{"items":[]}' > /tmp/gh-aw/agent_output.json fi + # Upload repo memory as artifacts for push job + - name: Upload repo-memory artifact (default) + if: always() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: repo-memory-default + path: /tmp/gh-aw/repo-memory/default + retention-days: 1 + if-no-files-found: ignore - name: Upload agent artifacts if: always() continue-on-error: true @@ -800,11 +876,13 @@ jobs: - activation - agent - detection + - push_repo_memory - safe_outputs if: always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true') runs-on: ubuntu-slim permissions: contents: read + discussions: write issues: write concurrency: group: "gh-aw-conclusion-copilot-token-optimizer" @@ -845,7 +923,8 @@ jobs: env: GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_NOOP_MAX: "1" - GH_AW_WORKFLOW_NAME: "Copilot Token Optimizer" + GH_AW_WORKFLOW_NAME: "Copilot Token Usage Optimizer" + GH_AW_TRACKER_ID: "copilot-token-optimizer" GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} GH_AW_NOOP_REPORT_AS_ISSUE: "true" @@ -862,7 +941,8 @@ jobs: env: GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_MISSING_TOOL_CREATE_ISSUE: "true" - GH_AW_WORKFLOW_NAME: "Copilot Token Optimizer" + GH_AW_WORKFLOW_NAME: "Copilot Token Usage Optimizer" + GH_AW_TRACKER_ID: "copilot-token-optimizer" with: github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} script: | @@ -876,14 +956,21 @@ jobs: uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 env: GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} - GH_AW_WORKFLOW_NAME: "Copilot Token Optimizer" + GH_AW_WORKFLOW_NAME: "Copilot Token Usage Optimizer" + GH_AW_TRACKER_ID: "copilot-token-optimizer" GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} GH_AW_WORKFLOW_ID: "copilot-token-optimizer" GH_AW_ENGINE_ID: "copilot" GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} GH_AW_INFERENCE_ACCESS_ERROR: ${{ needs.agent.outputs.inference_access_error }} + GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }} + GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }} GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }} + GH_AW_PUSH_REPO_MEMORY_RESULT: ${{ needs.push_repo_memory.result }} + GH_AW_REPO_MEMORY_VALIDATION_FAILED_default: ${{ needs.push_repo_memory.outputs.validation_failed_default }} + GH_AW_REPO_MEMORY_VALIDATION_ERROR_default: ${{ needs.push_repo_memory.outputs.validation_error_default }} + GH_AW_REPO_MEMORY_PATCH_SIZE_EXCEEDED_default: ${{ needs.push_repo_memory.outputs.patch_size_exceeded_default }} GH_AW_GROUP_REPORTS: "false" GH_AW_FAILURE_REPORT_AS_ISSUE: "true" GH_AW_TIMEOUT_MINUTES: "30" @@ -978,8 +1065,8 @@ jobs: if: always() && steps.detection_guard.outputs.run_detection == 'true' uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 env: - WORKFLOW_NAME: "Copilot Token Optimizer" - WORKFLOW_DESCRIPTION: "Analyzes the most expensive Copilot workflow identified by the token usage analyzer and creates an optimization issue with specific token-saving recommendations" + WORKFLOW_NAME: "Copilot Token Usage Optimizer" + WORKFLOW_DESCRIPTION: "Daily optimizer that identifies a high-token-usage Copilot workflow, audits its runs, and recommends efficiency improvements" HAS_PATCH: ${{ needs.agent.outputs.has_patch }} with: script: | @@ -1049,13 +1136,23 @@ jobs: const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_threat_detection_results.cjs'); await main(); - pre_activation: + push_repo_memory: + needs: + - agent + - detection + if: > + always() && (needs.detection.result == 'success' || needs.detection.result == 'skipped') && + needs.agent.result == 'success' runs-on: ubuntu-slim permissions: - contents: read + contents: write + concurrency: + group: "push-repo-memory-${{ github.repository }}|memory/token-audit" + cancel-in-progress: false outputs: - activated: ${{ steps.check_membership.outputs.is_team_member == 'true' && steps.check_skip_if_match.outputs.skip_check_ok == 'true' }} - matched_command: '' + patch_size_exceeded_default: ${{ steps.push_repo_memory_default.outputs.patch_size_exceeded }} + validation_error_default: ${{ steps.push_repo_memory_default.outputs.validation_error }} + validation_failed_default: ${{ steps.push_repo_memory_default.outputs.validation_failed }} steps: - name: Checkout actions folder uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -1068,31 +1165,61 @@ jobs: uses: ./actions/setup with: destination: ${{ runner.temp }}/gh-aw/actions - - name: Check team membership for workflow - id: check_membership - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + sparse-checkout: . + - name: Configure Git credentials env: - GH_AW_REQUIRED_ROLES: "admin,maintainer,write" + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + git config --global am.keepcr true + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Download repo-memory artifact (default) + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + continue-on-error: true with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/check_membership.cjs'); - await main(); - - name: Check skip-if-match query - id: check_skip_if_match + name: repo-memory-default + path: /tmp/gh-aw/repo-memory/default + - name: Push repo-memory changes (default) + id: push_repo_memory_default + if: always() uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 env: - GH_AW_SKIP_QUERY: "is:issue is:open in:title \"โšก Copilot Token Optimization:\"" - GH_AW_WORKFLOW_NAME: "Copilot Token Optimizer" - GH_AW_SKIP_MAX_MATCHES: "1" + GH_TOKEN: ${{ github.token }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_SERVER_URL: ${{ github.server_url }} + ARTIFACT_DIR: /tmp/gh-aw/repo-memory/default + MEMORY_ID: default + TARGET_REPO: ${{ github.repository }} + BRANCH_NAME: memory/token-audit + MAX_FILE_SIZE: 102400 + MAX_FILE_COUNT: 100 + MAX_PATCH_SIZE: 10240 + ALLOWED_EXTENSIONS: '[]' + FILE_GLOB_FILTER: "memory/token-audit/*.json memory/token-audit/*.jsonl memory/token-audit/*.csv memory/token-audit/*.md" with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); setupGlobals(core, github, context, exec, io); - const { main } = require('${{ runner.temp }}/gh-aw/actions/check_skip_if_match.cjs'); + const { main } = require('${{ runner.temp }}/gh-aw/actions/push_repo_memory.cjs'); await main(); + - name: Restore actions folder + if: always() + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions/setup + sparse-checkout-cone-mode: true + persist-credentials: false safe_outputs: needs: @@ -1102,6 +1229,7 @@ jobs: runs-on: ubuntu-slim permissions: contents: read + discussions: write issues: write timeout-minutes: 15 env: @@ -1109,15 +1237,14 @@ jobs: GH_AW_EFFECTIVE_TOKENS: ${{ needs.agent.outputs.effective_tokens }} GH_AW_ENGINE_ID: "copilot" GH_AW_ENGINE_MODEL: ${{ needs.agent.outputs.model }} + GH_AW_TRACKER_ID: "copilot-token-optimizer" GH_AW_WORKFLOW_ID: "copilot-token-optimizer" - GH_AW_WORKFLOW_NAME: "Copilot Token Optimizer" + GH_AW_WORKFLOW_NAME: "Copilot Token Usage Optimizer" outputs: code_push_failure_count: ${{ steps.process_safe_outputs.outputs.code_push_failure_count }} code_push_failure_errors: ${{ steps.process_safe_outputs.outputs.code_push_failure_errors }} create_discussion_error_count: ${{ steps.process_safe_outputs.outputs.create_discussion_error_count }} create_discussion_errors: ${{ steps.process_safe_outputs.outputs.create_discussion_errors }} - created_issue_number: ${{ steps.process_safe_outputs.outputs.created_issue_number }} - created_issue_url: ${{ steps.process_safe_outputs.outputs.created_issue_url }} process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }} process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} steps: @@ -1163,7 +1290,7 @@ jobs: GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" GITHUB_SERVER_URL: ${{ github.server_url }} GITHUB_API_URL: ${{ github.api_url }} - GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_issue\":{\"close_older_issues\":true,\"expires\":168,\"labels\":[\"automated-analysis\",\"token-optimization\",\"copilot\",\"cost-reduction\"],\"max\":1,\"title_prefix\":\"โšก Copilot Token Optimization: \"},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"true\"}}" + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"audits\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"title_prefix\":\"[copilot-token-optimizer] \"},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"true\"}}" with: github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} script: | diff --git a/.github/workflows/copilot-token-optimizer.md b/.github/workflows/copilot-token-optimizer.md index d871d63ca74..61f5ad6a4ef 100644 --- a/.github/workflows/copilot-token-optimizer.md +++ b/.github/workflows/copilot-token-optimizer.md @@ -1,48 +1,22 @@ --- -name: Copilot Token Optimizer -description: Analyzes the most expensive Copilot workflow identified by the token usage analyzer and creates an optimization issue with specific token-saving recommendations +description: Daily optimizer that identifies a high-token-usage Copilot workflow, audits its runs, and recommends efficiency improvements on: - workflow_run: - workflows: - - "Copilot Token Usage Analyzer" - types: - - completed - branches: - - main + schedule: + - cron: "daily around 14:00 on weekdays" workflow_dispatch: - skip-if-match: 'is:issue is:open in:title "โšก Copilot Token Optimization:"' - permissions: contents: read actions: read issues: read pull-requests: read - +tracker-id: copilot-token-optimizer engine: copilot -features: - copilot-requests: true - -strict: true - tools: + agentic-workflows: + github: + toolsets: [default] bash: - "*" - github: - toolsets: [default, issues, actions, repos] - -safe-outputs: - create-issue: - title-prefix: "โšก Copilot Token Optimization: " - labels: [automated-analysis, token-optimization, copilot, cost-reduction] - expires: 7d - max: 1 - close-older-issues: true - noop: - -network: defaults - -timeout-minutes: 30 - steps: - name: Install gh-aw CLI env: @@ -54,306 +28,265 @@ steps: gh extension install github/gh-aw fi gh aw --version - - name: Find and download artifacts from the most expensive Copilot workflow - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - set -euo pipefail - mkdir -p /tmp/token-optimizer - - # Use pre-fetched logs from the shared token-logs-24h pre-step - cp /tmp/gh-aw/token-logs/copilot-runs.json /tmp/token-optimizer/copilot-runs.json 2>/dev/null || echo "[]" > /tmp/token-optimizer/copilot-runs.json - - RUN_COUNT=$(jq 'length' /tmp/token-optimizer/copilot-runs.json 2>/dev/null || echo 0) - echo "Found ${RUN_COUNT} Copilot runs" - - if [ "$RUN_COUNT" -eq 0 ]; then - echo "No Copilot runs found, nothing to optimize" - exit 0 - fi - - # Find the most expensive workflow (by total tokens across all its runs) - # Schema: gh aw logs --json โ†’ LogsData.runs[] (RunData from pkg/cli/logs_report.go) - # .workflow_name (string), .token_usage (int, omitempty โ†’ null when 0), - # .estimated_cost (float, omitempty), .database_id (int64), .created_at (time), .url (string) - echo "๐Ÿ” Identifying most expensive workflow..." - jq -r ' - sort_by(.workflow_name) | - group_by(.workflow_name) | - map({ - workflow: .[0].workflow_name, - total_tokens: (map(.token_usage // 0) | add), - total_cost: (map(.estimated_cost // 0) | add), - run_count: length, - avg_tokens: ((map(.token_usage // 0) | add) / length), - run_ids: map(.database_id), - latest_run_id: (sort_by(.created_at) | last | .database_id), - latest_run_url: (sort_by(.created_at) | last | .url) - }) | - sort_by(.total_tokens) | reverse | .[0] - ' /tmp/token-optimizer/copilot-runs.json > /tmp/token-optimizer/top-workflow.json - - WORKFLOW_NAME=$(jq -r '.workflow' /tmp/token-optimizer/top-workflow.json) - LATEST_RUN_ID=$(jq -r '.latest_run_id' /tmp/token-optimizer/top-workflow.json) - echo "Most expensive workflow: $WORKFLOW_NAME (run: $LATEST_RUN_ID)" - echo "WORKFLOW_NAME=$WORKFLOW_NAME" >> "$GITHUB_ENV" - - # Download the firewall-audit-logs artifact from the latest run of that workflow - ARTIFACT_DIR="/tmp/token-optimizer/artifacts" - mkdir -p "$ARTIFACT_DIR" - - echo "๐Ÿ“ฅ Downloading firewall-audit-logs from run $LATEST_RUN_ID..." - gh run download "$LATEST_RUN_ID" \ - --repo "$GITHUB_REPOSITORY" \ - --name "firewall-audit-logs" \ - --dir "$ARTIFACT_DIR" \ - 2>/dev/null || true - - # Also download agent artifacts (contains prompt and tool usage logs) - echo "๐Ÿ“ฅ Downloading agent artifacts from run $LATEST_RUN_ID..." - gh run download "$LATEST_RUN_ID" \ - --repo "$GITHUB_REPOSITORY" \ - --name "agent" \ - --dir "$ARTIFACT_DIR/agent" \ - 2>/dev/null || true - - # Find token-usage.jsonl - USAGE_FILE=$(find "$ARTIFACT_DIR" -name "token-usage.jsonl" 2>/dev/null | head -1) - if [ -n "$USAGE_FILE" ]; then - echo "Found token-usage.jsonl: $USAGE_FILE" - cp "$USAGE_FILE" /tmp/token-optimizer/token-usage.jsonl - wc -l < /tmp/token-optimizer/token-usage.jsonl - else - echo "No token-usage.jsonl found in artifacts" - touch /tmp/token-optimizer/token-usage.jsonl - fi - - # Find the workflow markdown source - WORKFLOW_MD_NAME=$(echo "$WORKFLOW_NAME" | tr '[:upper:]' '[:lower:]' | tr ' ' '-') - WORKFLOW_MD=".github/workflows/${WORKFLOW_MD_NAME}.md" - if [ -f "$WORKFLOW_MD" ]; then - echo "Found workflow source: $WORKFLOW_MD" - cp "$WORKFLOW_MD" /tmp/token-optimizer/workflow-source.md - else - echo "Workflow source not found at $WORKFLOW_MD, searching..." - FOUND_MD=$(find .github/workflows -name "*.md" -exec grep -l "^name: $WORKFLOW_NAME" {} \; 2>/dev/null | head -1 || true) - if [ -n "$FOUND_MD" ]; then - echo "Found: $FOUND_MD" - cp "$FOUND_MD" /tmp/token-optimizer/workflow-source.md - fi - fi - - # Extract declared tools from workflow source (if available) - if [ -f /tmp/token-optimizer/workflow-source.md ]; then - echo "๐Ÿ“‹ Extracting declared tools from workflow source..." - # Extract tools section from frontmatter - sed -n '/^---$/,/^---$/p' /tmp/token-optimizer/workflow-source.md | \ - grep -A20 "^tools:" | head -30 > /tmp/token-optimizer/declared-tools.txt || true - cat /tmp/token-optimizer/declared-tools.txt - fi - +timeout-minutes: 30 imports: - - shared/token-logs-24h.md + - uses: shared/daily-audit-discussion.md + with: + title-prefix: "[copilot-token-optimizer] " + expires: "7d" + - uses: shared/repo-memory-standard.md + with: + branch-name: "memory/token-audit" + description: "Historical daily Copilot token usage snapshots (shared with copilot-token-audit)" + - copilot-setup-steps.yml - shared/reporting.md +features: + copilot-requests: true --- +{{#runtime-import? .github/shared-instructions.md}} -# Copilot Token Optimizer - -You are the Copilot Token Optimizer. Your job is to analyze the most token-expensive Copilot workflow from the past 24 hours and create a targeted optimization issue with specific, actionable recommendations. +# Copilot Token Usage Optimizer -## Current Context +You are the Copilot Token Optimizer โ€” an analyst that picks one high-token-usage workflow per day, deeply audits its recent runs, and produces actionable recommendations to reduce token consumption. -- **Repository**: ${{ github.repository }} -- **Analysis Date**: $(date -u +%Y-%m-%d) -- **Target Workflow**: ${{ env.WORKFLOW_NAME }} +## Mission -## Data Sources +1. Read the latest token audit snapshot from repo-memory to identify heavy-hitter workflows. +2. Pick the **single workflow** with the highest total token usage that has **not been optimized recently**. +3. Use the `agentic-workflows` MCP tools (`logs`, `audit`) to deeply inspect 5โ€“10 recent runs of that workflow. +4. Analyze firewall proxy token logs, tool usage patterns, MCP server calls, and error/warning counts. +5. Produce a conservative, evidence-based optimization discussion with specific recommendations. -All data is in `/tmp/token-optimizer/`: +## Guiding Principles -- **`copilot-runs.json`** โ€” All Copilot runs from the last 24 hours -- **`top-workflow.json`** โ€” Statistics for the most expensive workflow -- **`token-usage.jsonl`** โ€” Per-request token records from the target workflow's last run (may be empty if artifact unavailable) -- **`workflow-source.md`** โ€” The workflow's markdown source (may not exist if not found) -- **`declared-tools.txt`** โ€” Tools declared in the workflow frontmatter -- **`artifacts/agent/`** โ€” Agent artifacts (prompt, MCP logs, agent-stdio.log) from the last run +- **Be conservative**: Only recommend changes backed by evidence from multiple runs. +- **Look at many runs**: A tool that appears unused in 1 run may be critical in edge cases. Check at least 5 runs before recommending removal. +- **Quantify impact**: Estimate token savings for each recommendation. +- **Preserve correctness**: Never recommend removing a tool that is successfully used in *any* observed run. +- **Prioritize high-impact**: Focus on the biggest token savings first. -## Analysis Process +## Phase 1 โ€” Select Target Workflow -### Phase 1: Load Workflow Statistics +### Step 1.1: Load Audit Snapshot -Read the top workflow data: +Read the latest audit snapshot from repo-memory: ```bash -cat /tmp/token-optimizer/top-workflow.json +# Find the most recent snapshot +LATEST=$(ls -1 /tmp/gh-aw/repo-memory/default/*.json 2>/dev/null | grep -v rolling | grep -v optimization | sort -r | head -1) +if [ -z "$LATEST" ]; then + echo "โš ๏ธ No audit snapshots found. The copilot-token-audit workflow may not have run yet." + echo "Falling back to live data collection..." +fi +echo "Latest snapshot: $LATEST" +cat "$LATEST" | jq '.workflows[:10]' ``` -Note: `avg_tokens` is the key metric. Very high `avg_tokens` means each run is expensive. +### Step 1.2: Check Optimization History -### Phase 2: Analyze Token Usage Patterns - -If `token-usage.jsonl` is available (non-empty): +Read the optimization history to avoid re-analyzing recently optimized workflows: ```bash -# Per-model breakdown -awk '{ - if (match($0, /"model" *: *"([^"]*)"/, m)) model = m[1] - else model = "unknown" - if (match($0, /"input_tokens" *: *([0-9]+)/, m)) input = m[1]+0; else input = 0 - if (match($0, /"output_tokens" *: *([0-9]+)/, m)) output = m[1]+0; else output = 0 - if (match($0, /"cache_read_tokens" *: *([0-9]+)/, m)) cr = m[1]+0; else cr = 0 - models[model] = 1 - mi[model] += input; mo[model] += output; mcr[model] += cr; mc[model] += 1 -} -END { - for (m in models) - printf "Model: %s, Input: %d, Output: %d, CacheRead: %d, Requests: %d\n", - m, mi[m], mo[m], mcr[m], mc[m] -}' /tmp/token-optimizer/token-usage.jsonl +# Check if optimization log exists +OPT_LOG="/tmp/gh-aw/repo-memory/default/optimization-log.json" +if [ -f "$OPT_LOG" ]; then + echo "Previous optimizations:" + cat "$OPT_LOG" | jq -r '.[] | "\(.date): \(.workflow_name)"' +else + echo "No previous optimization history found." +fi ``` -Look for: -- **High input tokens per request** โ†’ large context window being used -- **Low cache hit rate** โ†’ context is not being reused across turns -- **High request count** โ†’ many back-and-forth turns; consider `max-turns` limit +### Step 1.3: Select Target -### Phase 3: Analyze Declared vs. Used Tools +Pick the workflow with the highest `total_tokens` from the audit snapshot that does **not** appear in the optimization log within the last 14 days. If all top workflows have been recently optimized, pick the one that was optimized longest ago. -Review the workflow source to understand which tools are declared: +If no audit snapshot exists, use the `agentic-workflows` MCP `logs` tool to query recent Copilot runs and select the heaviest consumer. -```bash -cat /tmp/token-optimizer/workflow-source.md -``` +## Phase 2 โ€” Deep Audit + +### Step 2.1: Fetch Recent Runs via MCP -Then check agent logs for which tools were actually called: +Use the `agentic-workflows` MCP `logs` tool to fetch the last 7 days of runs for the target workflow. This returns structured data including token usage, tool calls, and run metadata. + +Then use `gh aw logs` to download runs with firewall data for deeper analysis: ```bash -# Find MCP tool calls in agent logs -find /tmp/token-optimizer/artifacts/agent -name "*.log" 2>/dev/null | xargs grep -l "mcp__" 2>/dev/null | head -3 -grep -oh 'mcp__[a-z_]*__[a-z_]*' /tmp/token-optimizer/artifacts/agent/*.log 2>/dev/null | sort | uniq -c | sort -rn | head -30 +# Download last 7 days of runs for the selected workflow, with firewall data +gh aw logs \ + --engine copilot \ + --start-date -7d \ + --json \ + --firewall \ + -c 20 \ + > /tmp/gh-aw/token-audit/target-runs.json + +# Show summary +jq '{ + workflow: .runs[0].workflow_name, + total_runs: (.runs | length), + total_tokens: [.runs[].token_usage // 0] | add, + avg_tokens: ([.runs[].token_usage // 0] | add) / ([.runs[].token_usage // 0] | length), + tool_usage: .tool_usage +}' /tmp/gh-aw/token-audit/target-runs.json ``` -Compare: -- **Declared tools** (from frontmatter) vs **tools actually invoked** (from agent logs) -- Tools declared but never called are injected into the context window every turn, wasting tokens -- Each unused tool description typically costs ~500 tokens/turn +### Step 2.2: Audit Individual Runs -### Phase 4: Identify Specific Optimization Opportunities +Use the `agentic-workflows` MCP `audit` tool to get detailed data on 3โ€“5 representative runs (mix of high-token and typical-token runs). -Based on the data, identify opportunities from these categories: +For each audited run, extract: +- **Token usage breakdown** by model (`token_usage_summary.by_model`) +- **Tool usage**: which MCP tools were called, how many times, and whether they succeeded +- **Missing tools**: tools the agent tried to use but were not available +- **MCP failures**: MCP server errors or timeouts +- **Error and warning counts** +- **Turns**: total conversation turns +- **Firewall analysis**: blocked requests, allowed domains +- **Cache efficiency**: `token_usage_summary.cache_efficiency` -#### A. Unused Tool Exclusions -Tools declared in frontmatter but never invoked during the run. Removing these saves tokens every turn. +### Step 2.3: Read the Workflow Source -Example finding: "GitHub `toolsets: [default, actions, repos]` declared but only `issues` MCP tools called โ€” exclude `actions` and `repos` toolsets to save ~500 tokens/turn" +Use the GitHub MCP tools to read the target workflow's `.md` file from the repository. This lets you see: +- Which MCP tools are configured +- Network permissions +- Prompt instructions +- Imported shared components -#### B. Context Window Reduction -If input tokens per request are very high (> 50k), the context window may be bloated by: -- Large file reads (suggest chunking or streaming) -- Verbose MCP responses (suggest pagination with smaller `perPage`) -- Long conversation history (suggest `max-turns` reduction) +## Phase 3 โ€” Analysis -#### C. Turn Reduction -If request count per run is high (> 10 turns), consider: -- More specific prompt instructions to reduce back-and-forth -- Pre-computing data in `steps:` before the agent runs -- Using `strict: true` to fail fast on unexpected tool calls +### 3.1: Tool Usage Analysis -#### D. Prompt Optimization -- System prompt restructuring for better cache hit rate -- Removing verbose instructions that are rarely needed -- Using shared imports for common instructions instead of duplicating text +Cross-reference **configured tools** (from the workflow `.md`) with **actual tool usage** (from audit data): -### Phase 5: Create Optimization Issue +| Tool | Configured? | Used in N/M runs | Avg calls/run | Recommendation | +|---|---|---|---|---| +| ... | ... | ... | ... | Keep / Consider removing / Remove | -Create an issue with the title: `[workflow-name] (avg [N]k tokens/run)` โ€” the prefix `โšก Copilot Token Optimization:` is automatically added. +**Rules for tool recommendations:** +- **Keep**: Used in โ‰ฅ50% of audited runs, or used in any run and essential to the workflow's purpose +- **Consider removing**: Used in <20% of runs AND not part of the workflow's core purpose +- **Remove**: Never used across all audited runs AND not referenced in the prompt -#### Issue Body Structure +### 3.2: Token Efficiency Analysis -```markdown -### Target Workflow: [workflow-name] +- Compare `token_usage` vs `effective_tokens` โ€” a large gap suggests poor cache utilization +- Check `cache_efficiency` โ€” below 0.3 suggests the workflow isn't benefiting from caching +- Look at `turns` โ€” high turn counts relative to task complexity suggest the prompt could be clearer +- Check input vs output token ratio from `token_usage_summary.by_model` -**Why this workflow?** Highest total token consumption in the last 24 hours. +### 3.3: Error Pattern Analysis -| Metric | Value | -|--------|-------| -| Runs (24h) | [N] | -| Avg tokens/run | [N]k | -| Total est. cost (24h) | $[X] | -| Avg turns/run | [N] | +- Recurring errors or warnings that cause retries waste tokens +- MCP failures that trigger fallback behavior +- Missing tools that cause the agent to improvise (expensive) + +### 3.4: Prompt Efficiency + +- Is the prompt overly verbose? Long prompts consume input tokens on every turn +- Are there redundant instructions? +- Could few-shot examples be replaced with clearer constraints? + +## Phase 4 โ€” Recommendations -### Token Usage Breakdown +Generate specific, actionable recommendations with estimated token savings: -_(from token-usage.jsonl โ€” if available)_ +### Recommendation Categories -| Model | Input | Output | Cache Read | Cache Hit % | -|-------|-------|--------|------------|-------------| -| [model] | [n] | [n] | [n] | [pct]% | +1. **Tool Configuration** (high impact) + - Remove unused MCP tools (each tool's schema consumes input tokens) + - Consolidate overlapping tools + - Add missing tools that would prevent expensive workarounds -### Optimization Recommendations +2. **Prompt Optimization** (medium impact) + - Reduce prompt length where possible + - Clarify ambiguous instructions that cause extra turns + - Add constraints that prevent unnecessary exploration -#### 1. [Highest Impact Recommendation] +3. **Configuration Tuning** (medium impact) + - Adjust `timeout-minutes` if runs consistently finish early or time out + - Review `max-continuations` settings + - Consider `strict: true` if not already set -**Potential savings**: ~[N]k tokens/run (~$[X]/run ร— [N] runs/day = ~$[X]/day) +4. **Architecture Changes** (high impact, higher risk) + - Split large prompts into focused sub-workflows + - Use shared components to reduce duplication + - Pre-compute data in bash steps to reduce agent work -**Current state**: [What the workflow currently does that's expensive] +## Phase 5 โ€” Publish Discussion + +Create a discussion with the analysis. Use this structure: -**Recommended change**: -```diff -- [current config/prompt line] -+ [optimized config/prompt line] ``` +### ๐Ÿ” Optimization Target: [Workflow Name] -**Why this helps**: [Explanation] +**Selected because**: Highest token consumer not recently optimized +**Analysis period**: [date range] +**Runs analyzed**: N runs (M audited in detail) -#### 2. [Second Recommendation] +### ๐Ÿ“Š Token Usage Profile -... +| Metric | Value | +|---|---| +| Total tokens (7d) | N | +| Avg tokens/run | N | +| Total cost (7d) | $X.XX | +| Avg turns/run | N | +| Cache efficiency | X% | -
-Tool Usage Analysis +### ๐Ÿ”ง Recommendations -**Declared tools** (from frontmatter): -[list from declared-tools.txt] +#### 1. [Recommendation title] โ€” Est. savings: ~N tokens/run -**Tools actually invoked** (from agent logs): -[list from mcp call analysis] +[Evidence and rationale from multiple runs] -**Unused tools** (candidates for removal): -- `[toolset/tool]` โ€” never called, saves ~[N] tokens/turn if removed +**Action**: [Specific change to make] -
+#### 2. [Next recommendation] +...
-Raw Metrics +Tool Usage Matrix -[token-usage.jsonl summary] +[Full tool usage table]
-### Implementation Checklist +
+Audited Runs Detail + +[Per-run audit summaries with links] -- [ ] Apply recommended changes to `.github/workflows/[workflow-name].md` -- [ ] Run `make recompile` to regenerate the lock file -- [ ] Trigger a manual run via `workflow_dispatch` to verify -- [ ] Compare token usage in next analyzer report +
-### References +### โš ๏ธ Caveats -- [Last run of [workflow-name]](LATEST_RUN_URL) -- Analysis triggered by: [ยงRUN_ID](RUN_URL) +- These recommendations are based on N runs over M days +- Edge cases not observed in the sample may require some tools +- Verify changes in a test run before applying permanently ``` -## Important Guidelines +## Phase 6 โ€” Update Optimization Log -- **Be specific**: Name exact tools, exact token counts, exact cost estimates. -- **Prioritize by impact**: List the highest token-saving opportunity first. -- **Be conservative**: Only recommend removing tools you're confident are unused (verify from logs). -- **If no data**: If both `token-usage.jsonl` and agent logs are unavailable, base recommendations on workflow source analysis only, and note the limitation. -- **`noop` when appropriate**: If the workflow is already well-optimized (< 10k tokens/run average) or if you cannot find meaningful optimization opportunities, call `noop` instead of creating a low-value issue. - -**Important**: You MUST call a safe-output tool (`create-issue` or `noop`) at the end of your analysis. Failing to call any safe-output tool is the most common cause of workflow failures. +Append an entry to `/tmp/gh-aw/repo-memory/default/optimization-log.json`: ```json -{"noop": {"message": "No action needed: [brief explanation]"}} +{ + "date": "YYYY-MM-DD", + "workflow_name": "...", + "total_tokens_analyzed": N, + "runs_audited": N, + "recommendations_count": N, + "estimated_savings_per_run": N +} ``` + +Load the existing array, append the new entry, trim to the last 30 entries, and save. + +## Important Notes + +- The `agentic-workflows` MCP tools (`logs`, `audit`) are your primary interface for querying run data beyond the pre-downloaded snapshot. +- Use `gh aw logs` and `gh aw audit` CLI commands in bash steps for bulk data downloads with firewall details. +- Treat null/missing `token_usage` and `estimated_cost` as 0. +- The repo-memory branch `memory/token-audit` is shared with the `copilot-token-audit` workflow โ€” read its snapshots but don't overwrite them. Only write to `optimization-log.json`. +- If the audit snapshot is stale (>3 days old), fall back to the `agentic-workflows` MCP `logs` tool for fresh data. diff --git a/.github/workflows/copilot-token-usage-analyzer.md b/.github/workflows/copilot-token-usage-analyzer.md deleted file mode 100644 index eb38b01fe94..00000000000 --- a/.github/workflows/copilot-token-usage-analyzer.md +++ /dev/null @@ -1,409 +0,0 @@ ---- -name: Copilot Token Usage Analyzer -description: Daily analysis of Copilot token consumption across all agentic workflows, creating a usage report issue with per-workflow statistics and optimization opportunities -on: - schedule: - - cron: "daily around 09:00 on weekdays" - workflow_dispatch: - -permissions: - contents: read - actions: read - issues: read - pull-requests: read - -engine: copilot -features: - copilot-requests: true - -tools: - bash: - - "*" - github: - toolsets: [default, issues, actions] - -safe-outputs: - create-issue: - title-prefix: "๐Ÿ“Š Copilot Token Usage Report: " - labels: [automated-analysis, token-usage, copilot] - expires: 2d - max: 1 - close-older-issues: true - upload-asset: - noop: - -network: defaults - -timeout-minutes: 30 - -steps: - - name: Install gh-aw CLI - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - if gh extension list | grep -q "github/gh-aw"; then - gh extension upgrade gh-aw || true - else - gh extension install github/gh-aw - fi - gh aw --version - - name: Download Copilot workflow runs (last 24h) - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - set -euo pipefail - mkdir -p /tmp/token-analyzer - - # Use pre-fetched logs from the shared token-logs-24h pre-step - cp /tmp/gh-aw/token-logs/copilot-runs.json /tmp/token-analyzer/copilot-runs.json 2>/dev/null || echo "[]" > /tmp/token-analyzer/copilot-runs.json - - RUN_COUNT=$(jq 'length' /tmp/token-analyzer/copilot-runs.json 2>/dev/null || echo 0) - echo "โœ… Found ${RUN_COUNT} Copilot workflow runs" - - # Download token-usage.jsonl artifacts for per-model breakdown - # We look for the firewall-audit-logs artifact which contains token-usage.jsonl - ARTIFACT_DIR="/tmp/token-analyzer/artifacts" - mkdir -p "$ARTIFACT_DIR" - - echo "๐Ÿ“ฅ Downloading token-usage.jsonl artifacts..." - jq -r '.[0:50][]?.database_id' /tmp/token-analyzer/copilot-runs.json 2>/dev/null > /tmp/token-analyzer/run-ids.txt || true - while read -r run_id; do - run_dir="$ARTIFACT_DIR/$run_id" - mkdir -p "$run_dir" - gh run download "$run_id" \ - --repo "$GITHUB_REPOSITORY" \ - --name "firewall-audit-logs" \ - --dir "$run_dir" \ - 2>/dev/null || true - done < /tmp/token-analyzer/run-ids.txt - - # Count how many token-usage.jsonl files we got - JSONL_COUNT=$(find "$ARTIFACT_DIR" -name "token-usage.jsonl" 2>/dev/null | wc -l) - echo "โœ… Downloaded ${JSONL_COUNT} token-usage.jsonl artifacts" - - # Merge all token-usage.jsonl files into a single aggregate file annotated with run_id - MERGED_FILE="/tmp/token-analyzer/token-usage-merged.jsonl" - > "$MERGED_FILE" - find "$ARTIFACT_DIR" -name "token-usage.jsonl" > /tmp/token-analyzer/jsonl-files.txt 2>/dev/null || true - while read -r f; do - run_id=$(echo "$f" | grep -oP '(?<=/artifacts/)\d+(?=/)' || true) - while IFS= read -r line; do - if [ -n "$line" ]; then - echo "${line}" | jq --arg run_id "$run_id" '. + {run_id: $run_id}' >> "$MERGED_FILE" 2>/dev/null || true - fi - done < "$f" - done < /tmp/token-analyzer/jsonl-files.txt - - RECORD_COUNT=$(wc -l < "$MERGED_FILE" 2>/dev/null || echo 0) - echo "โœ… Merged ${RECORD_COUNT} token usage records" - -imports: - - shared/token-logs-24h.md - - shared/reporting.md - - shared/charts-with-trending.md ---- - -# Copilot Token Usage Analyzer - -You are the Copilot Token Usage Analyzer. Your job is to analyze Copilot token consumption across all agentic workflows that ran in the past 24 hours and create a concise, actionable report issue. - -## Current Context - -- **Repository**: ${{ github.repository }} -- **Analysis Date**: $(date -u +%Y-%m-%d) -- **Engine Filter**: Copilot only -- **Window**: Last 24 hours - -## Data Sources - -Pre-downloaded data is available in `/tmp/token-analyzer/`: - -- **`/tmp/token-analyzer/copilot-runs.json`** โ€” All Copilot workflow runs from the last 24 hours (array of run objects with `workflow_name`, `database_id`, `token_usage`, `turns`, `url`, `conclusion`, etc.) -- **`/tmp/token-analyzer/token-usage-merged.jsonl`** โ€” Merged per-request token records from `firewall-audit-logs` artifacts, with fields: `model`, `provider`, `input_tokens`, `output_tokens`, `cache_read_tokens`, `cache_write_tokens`, `duration_ms`, `run_id` - -## Analysis Process - -### Phase 1: Parse Workflow Run Data - -Process `/tmp/token-analyzer/copilot-runs.json` to compute per-workflow statistics: - -```bash -jq -r '.[] | [.workflow_name, .token_usage, .turns, .conclusion, .url, .database_id] | @tsv' \ - /tmp/token-analyzer/copilot-runs.json -``` - -Compute for each workflow: -- **Total runs** and **successful runs** (conclusion == "success") -- **Total tokens** and **average tokens per run** -- **Total estimated cost** and **average cost per run** -- **Average turns per run** -- **Run IDs** for the most expensive runs (for artifact links) - -### Phase 1.5: Save Today's Data to Cache-Memory - -After computing per-workflow statistics, persist today's aggregated data for trending. Use the `bash` tool: - -```bash -mkdir -p /tmp/gh-aw/cache-memory/trending/token-usage -TODAY=$(date -u +%Y-%m-%d) # Always use UTC date for consistency with the Python charts - -# Append daily aggregated totals (one JSON object per line) -cat >> /tmp/gh-aw/cache-memory/trending/token-usage/history.jsonl << EOF -{"date":"${TODAY}","total_tokens":TOTAL_TOKENS,"total_runs":TOTAL_RUNS,"total_cost":TOTAL_COST,"total_turns":TOTAL_TURNS} -EOF - -# Append per-workflow breakdown for heatmap (one entry per workflow โ€” repeat for each workflow): -cat >> /tmp/gh-aw/cache-memory/trending/token-usage/workflows.jsonl << EOF -{"date":"${TODAY}","workflow":"WORKFLOW_NAME","tokens":TOKENS,"runs":RUNS,"cost":COST} -EOF -``` - -Replace the placeholder values (TOTAL_TOKENS, TOTAL_RUNS, etc.) with the actual computed numbers. **Only append entries for workflows that actually ran today** โ€” do not append zero-entries for missing days, as the Python charts gracefully skip charts when data is insufficient. - -### Phase 2: Parse Token-Level Data (if available) - -Process `/tmp/token-analyzer/token-usage-merged.jsonl` for per-model breakdown: - -```bash -# Aggregate by model -jq -r '[.model, .input_tokens, .output_tokens, .cache_read_tokens, .cache_write_tokens] | @tsv' \ - /tmp/token-analyzer/token-usage-merged.jsonl 2>/dev/null | awk '...' -``` - -Compute for each model: -- **Total input tokens** (billed at full rate) -- **Total output tokens** (billed at full rate) -- **Total cache read tokens** (billed at reduced rate ~10%) -- **Cache hit rate**: `cache_read / (input + cache_read)` ร— 100% -- **Billable token equivalent**: approximate total considering cache discounts - -### Phase 3: Identify Top Workflows and Anomalies - -From the per-workflow statistics, identify: -1. **Top 5 most expensive workflows** by total estimated cost -2. **Highest token-per-turn ratio** (potential for optimization) -3. **Lowest cache hit rate** (may benefit from prompt restructuring) -4. **Highest run volume** (most frequent consumers) - -### Phase 3.5: Generate Trending Charts - -Generate Python charts to embed in the report issue. Use the Python environment provided by `shared/charts-with-trending.md`. - -```bash -mkdir -p /tmp/gh-aw/python/{data,charts} -``` - -Write the following Python script to `/tmp/gh-aw/python/token_charts.py` and execute it: - -```python -#!/usr/bin/env python3 -""" -Copilot token usage trending charts -Generates: top-consumers bar, daily trend line, workflow heatmap -""" -import json, os -import pandas as pd -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -import seaborn as sns -from datetime import datetime - -CACHE_DIR = '/tmp/gh-aw/cache-memory/trending/token-usage' -CHARTS_DIR = '/tmp/gh-aw/python/charts' -os.makedirs(CHARTS_DIR, exist_ok=True) - -sns.set_style('whitegrid') - -# --- Chart 1: Top-10 Consumers (always) --- -# Build from today's per-workflow data already in workflows.jsonl -wf_file = os.path.join(CACHE_DIR, 'workflows.jsonl') -today = datetime.utcnow().strftime('%Y-%m-%d') -wf_rows = [] -if os.path.exists(wf_file): - with open(wf_file) as f: - for line in f: - line = line.strip() - if line: - obj = json.loads(line) - if obj.get('date') == today: - wf_rows.append(obj) - -if wf_rows: - df_wf = pd.DataFrame(wf_rows) - df_top = df_wf.groupby('workflow')['tokens'].sum().nlargest(10).sort_values() - fig, ax = plt.subplots(figsize=(12, 7), dpi=150) - colors = sns.color_palette('YlOrRd', len(df_top)) - ax.barh(df_top.index, df_top.values, color=colors) - ax.set_xlabel('Total Tokens', fontsize=12) - ax.set_title(f'๐Ÿ”ฅ Top-10 Copilot Token Consumers โ€” {today}', fontsize=14, fontweight='bold') - for i, v in enumerate(df_top.values): - ax.text(v * 1.005, i, f'{v:,.0f}', va='center', fontsize=9) - plt.tight_layout() - plt.savefig(f'{CHARTS_DIR}/top_consumers.png', dpi=150, bbox_inches='tight', facecolor='white') - plt.close() - print(f'โœ… top_consumers.png saved ({len(df_wf)} workflows)') -else: - print('โš ๏ธ No workflow data for today โ€” skipping top_consumers chart') - -# --- Chart 2: Daily trend line (>=2 data points) --- -hist_file = os.path.join(CACHE_DIR, 'history.jsonl') -hist_rows = [] -if os.path.exists(hist_file): - with open(hist_file) as f: - for line in f: - line = line.strip() - if line: - hist_rows.append(json.loads(line)) - -if len(hist_rows) >= 2: - df_hist = pd.DataFrame(hist_rows) - df_hist['date'] = pd.to_datetime(df_hist['date']) - df_hist = df_hist.sort_values('date').drop_duplicates('date') - - fig, ax1 = plt.subplots(figsize=(12, 6), dpi=150) - color_tok = '#d62728' - color_run = '#1f77b4' - ax1.set_xlabel('Date', fontsize=11) - ax1.set_ylabel('Total Tokens', color=color_tok, fontsize=11) - ax1.plot(df_hist['date'], df_hist['total_tokens'], color=color_tok, - marker='o', linewidth=2, label='Total Tokens') - ax1.tick_params(axis='y', labelcolor=color_tok) - - ax2 = ax1.twinx() - ax2.set_ylabel('Total Runs', color=color_run, fontsize=11) - ax2.plot(df_hist['date'], df_hist['total_runs'], color=color_run, - marker='s', linewidth=2, linestyle='--', label='Total Runs') - ax2.tick_params(axis='y', labelcolor=color_run) - - lines1, labels1 = ax1.get_legend_handles_labels() - lines2, labels2 = ax2.get_legend_handles_labels() - ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left') - fig.suptitle('๐Ÿ“ˆ Copilot Token Usage โ€” Daily Trend', fontsize=14, fontweight='bold') - plt.xticks(rotation=30) - plt.tight_layout() - plt.savefig(f'{CHARTS_DIR}/daily_trend.png', dpi=150, bbox_inches='tight', facecolor='white') - plt.close() - print(f'โœ… daily_trend.png saved ({len(df_hist)} data points)') -else: - print(f'โ„น๏ธ Only {len(hist_rows)} history point(s) โ€” daily_trend requires โ‰ฅ2') - -# --- Chart 3: Workflow heatmap (>=3 data points) --- -if os.path.exists(wf_file) and len(hist_rows) >= 3: - all_wf = [] - with open(wf_file) as f: - for line in f: - line = line.strip() - if line: - all_wf.append(json.loads(line)) - if all_wf: - df_all = pd.DataFrame(all_wf) - df_all['date'] = pd.to_datetime(df_all['date']) - top8 = df_all.groupby('workflow')['tokens'].sum().nlargest(8).index.tolist() - df_heat = df_all[df_all['workflow'].isin(top8)].copy() - recent_dates = sorted(df_heat['date'].unique())[-14:] # last 14 days - df_heat = df_heat[df_heat['date'].isin(recent_dates)] - pivot = df_heat.pivot_table(index='workflow', columns='date', - values='tokens', aggfunc='sum', fill_value=0) - pivot.columns = [d.strftime('%m/%d') for d in pivot.columns] - fig, ax = plt.subplots(figsize=(max(10, len(pivot.columns) * 0.9), 6), dpi=150) - sns.heatmap(pivot, cmap='YlOrRd', annot=True, fmt='.0f', - linewidths=0.5, ax=ax, cbar_kws={'label': 'Tokens'}) - ax.set_title('๐Ÿ—“๏ธ Workflow Token Heatmap โ€” Top-8 Workflows', fontsize=14, fontweight='bold') - ax.set_xlabel('Date', fontsize=11) - ax.set_ylabel('Workflow', fontsize=11) - plt.tight_layout() - plt.savefig(f'{CHARTS_DIR}/workflow_heatmap.png', dpi=150, bbox_inches='tight', facecolor='white') - plt.close() - print(f'โœ… workflow_heatmap.png saved ({len(pivot)} workflows ร— {len(pivot.columns)} dates)') - else: - print('โ„น๏ธ No multi-day workflow data yet โ€” heatmap requires โ‰ฅ3 history points') -else: - print(f'โ„น๏ธ Only {len(hist_rows)} history point(s) โ€” heatmap requires โ‰ฅ3') -``` - -Run the script: -```bash -python3 /tmp/gh-aw/python/token_charts.py -``` - -After the script succeeds, upload each generated chart using the `upload asset` safe-output tool. **Check file existence before uploading**: -- If `/tmp/gh-aw/python/charts/top_consumers.png` exists: upload it โ†’ save URL as `TOP_CONSUMERS_URL` -- If `/tmp/gh-aw/python/charts/daily_trend.png` exists: upload it โ†’ save URL as `DAILY_TREND_URL` -- If `/tmp/gh-aw/python/charts/workflow_heatmap.png` exists: upload it โ†’ save URL as `HEATMAP_URL` - -Skip the upload call entirely for any chart that was not generated. - -### Phase 4: Create Report Issue - -Create an issue with the title format: `YYYY-MM-DD` (date only โ€” the prefix `๐Ÿ“Š Copilot Token Usage Report:` is automatically added). - -#### Issue Body Structure - -```markdown -### Summary - -Analyzed **[N]** Copilot workflow runs from **[DATE]** covering **[M]** unique workflows. -Total: **[TOTAL_TOKENS]** tokens (~**$[TOTAL_COST]**) across **[TOTAL_TURNS]** turns. - -### ๐Ÿ“Š Token Usage Charts - -#### ๐Ÿ”ฅ Top Consumers -![Top Consumers](TOP_CONSUMERS_URL) - -#### ๐Ÿ“ˆ Daily Trend -_(Include this section only when DAILY_TREND_URL is available โ€” requires โ‰ฅ 2 historical data points)_ -![Daily Token Trend](DAILY_TREND_URL) - -#### ๐Ÿ—“๏ธ Workflow Heatmap -_(Include this section only when HEATMAP_URL is available โ€” requires โ‰ฅ 3 historical data points)_ -![Workflow Heatmap](HEATMAP_URL) - -### Top Workflows by Cost - -| Workflow | Runs | Total Tokens | Avg Tokens/Run | Est. Cost | Avg Turns | -|----------|------|--------------|----------------|-----------|-----------| -| [name] | [n] | [tokens] | [avg] | $[cost] | [turns] | -| ... | | | | | | - -### Token Breakdown by Model - -| Model | Input Tokens | Output Tokens | Cache Read | Cache Hit % | Requests | -|-------|-------------|---------------|------------|-------------|----------| -| [model] | [n] | [n] | [n] | [pct]% | [n] | - -_(Only shown when token-usage.jsonl artifacts are available)_ - -
-All Workflows (Full Statistics) - -| Workflow | Runs | Success Rate | Total Tokens | Total Cost | Avg Turns | Avg Cost/Run | -|----------|------|--------------|--------------|------------|-----------|--------------| -| [name] | [n] | [pct]% | [tokens] | $[cost] | [turns] | $[avg] | -| ... | | | | | | | - -
- -### Optimization Opportunities - -1. **[Workflow]** โ€” [specific observation, e.g., "avg 45k tokens/run with 0% cache hit rate โ€” consider restructuring prompt for better caching"] -2. **[Workflow]** โ€” [observation] - -### References - -- Triggered by: [ยงRUN_ID](RUN_URL) -``` - -## Important Guidelines - -- **If no runs found**: Call `noop` with message explaining no Copilot runs in the last 24 hours. -- **Be precise**: Use exact numbers from the data, not estimates. -- **Link runs**: Format run IDs as `[ยงID](URL)` for easy navigation. -- **One issue only**: The `max: 1` configuration ensures only one issue is created; older issues are auto-closed. -- **Use `noop` if needed**: If you cannot create a meaningful report (no data, parse errors), call `noop` with an explanation. - -**Important**: You MUST call a safe-output tool (`create-issue` or `noop`) at the end of your analysis. Failing to call any safe-output tool is the most common cause of workflow failures. - -```json -{"noop": {"message": "No action needed: [brief explanation]"}} -```