From d265240b9a514735a8f9fc9f59b634719e3ea2b9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 5 Nov 2025 22:40:11 +0000 Subject: [PATCH 1/2] Initial plan From 1957b61ee101d44db5b1b399869bf6cf195766d5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 5 Nov 2025 22:49:44 +0000 Subject: [PATCH 2/2] Add workflow optimizations based on live log analysis - Create shared/mcp-pagination.md with MCP response size best practices - Update smoke-claude.md with pagination imports, toolsets config, and max-turns limit - Enhance cli-version-checker.md with JSON parsing guidance for npm commands Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .../workflows/cli-version-checker.lock.yml | 49 +++++++ .github/workflows/cli-version-checker.md | 49 +++++++ .github/workflows/shared/mcp-pagination.md | 110 ++++++++++++++++ .github/workflows/smoke-claude.lock.yml | 123 +++++++++++++++++- .github/workflows/smoke-claude.md | 7 +- 5 files changed, 334 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/shared/mcp-pagination.md diff --git a/.github/workflows/cli-version-checker.lock.yml b/.github/workflows/cli-version-checker.lock.yml index ca339e2f09..8338b5bbc4 100644 --- a/.github/workflows/cli-version-checker.lock.yml +++ b/.github/workflows/cli-version-checker.lock.yml @@ -1327,6 +1327,55 @@ jobs: - Test with `make recompile` before creating PR - **DO NOT COMMIT** `*.lock.yml` or `pkg/workflow/js/*.js` files directly + ## Common JSON Parsing Issues + + When using npm commands or other CLI tools, their output may include informational messages with Unicode symbols that break JSON parsing: + + **Problem Patterns**: + - `Unexpected token 'ℹ', "ℹ Timeout "... is not valid JSON` + - `Unexpected token '⚠', "⚠ pip pack"... is not valid JSON` + - `Unexpected token '✓', "✓ Success"... is not valid JSON` + + **Solutions**: + + ### 1. Filter stderr (Recommended) + Redirect stderr to suppress npm warnings/info: + ```bash + npm view @github/copilot version 2>/dev/null + npm view @anthropic-ai/claude-code --json 2>/dev/null + ``` + + ### 2. Use grep to filter output + Remove lines with Unicode symbols before parsing: + ```bash + npm view @github/copilot --json | grep -v "^[ℹ⚠✓]" + ``` + + ### 3. Use jq for reliable extraction + Let jq handle malformed input: + ```bash + # Extract version field only, ignoring non-JSON lines + npm view @github/copilot --json 2>/dev/null | jq -r '.version' + ``` + + ### 4. Check tool output before parsing + Always validate JSON before attempting to parse: + ```bash + output=$(npm view package --json 2>/dev/null) + if echo "$output" | jq empty 2>/dev/null; then + # Valid JSON, safe to parse + version=$(echo "$output" | jq -r '.version') + else + # Invalid JSON, handle error + echo "Warning: npm output is not valid JSON" + fi + ``` + + **Best Practice**: Combine stderr filtering with jq extraction for most reliable results: + ```bash + npm view @github/copilot --json 2>/dev/null | jq -r '.version' + ``` + ## Error Handling - **SAVE PROGRESS**: Before exiting on errors, save current state to cache-memory - **RESUME ON RESTART**: Check cache-memory on startup to resume from where you left off diff --git a/.github/workflows/cli-version-checker.md b/.github/workflows/cli-version-checker.md index c1149d668c..64fd40e885 100644 --- a/.github/workflows/cli-version-checker.md +++ b/.github/workflows/cli-version-checker.md @@ -125,6 +125,55 @@ Template structure: - Test with `make recompile` before creating PR - **DO NOT COMMIT** `*.lock.yml` or `pkg/workflow/js/*.js` files directly +## Common JSON Parsing Issues + +When using npm commands or other CLI tools, their output may include informational messages with Unicode symbols that break JSON parsing: + +**Problem Patterns**: +- `Unexpected token 'ℹ', "ℹ Timeout "... is not valid JSON` +- `Unexpected token '⚠', "⚠ pip pack"... is not valid JSON` +- `Unexpected token '✓', "✓ Success"... is not valid JSON` + +**Solutions**: + +### 1. Filter stderr (Recommended) +Redirect stderr to suppress npm warnings/info: +```bash +npm view @github/copilot version 2>/dev/null +npm view @anthropic-ai/claude-code --json 2>/dev/null +``` + +### 2. Use grep to filter output +Remove lines with Unicode symbols before parsing: +```bash +npm view @github/copilot --json | grep -v "^[ℹ⚠✓]" +``` + +### 3. Use jq for reliable extraction +Let jq handle malformed input: +```bash +# Extract version field only, ignoring non-JSON lines +npm view @github/copilot --json 2>/dev/null | jq -r '.version' +``` + +### 4. Check tool output before parsing +Always validate JSON before attempting to parse: +```bash +output=$(npm view package --json 2>/dev/null) +if echo "$output" | jq empty 2>/dev/null; then + # Valid JSON, safe to parse + version=$(echo "$output" | jq -r '.version') +else + # Invalid JSON, handle error + echo "Warning: npm output is not valid JSON" +fi +``` + +**Best Practice**: Combine stderr filtering with jq extraction for most reliable results: +```bash +npm view @github/copilot --json 2>/dev/null | jq -r '.version' +``` + ## Error Handling - **SAVE PROGRESS**: Before exiting on errors, save current state to cache-memory - **RESUME ON RESTART**: Check cache-memory on startup to resume from where you left off diff --git a/.github/workflows/shared/mcp-pagination.md b/.github/workflows/shared/mcp-pagination.md new file mode 100644 index 0000000000..f3a9928f5b --- /dev/null +++ b/.github/workflows/shared/mcp-pagination.md @@ -0,0 +1,110 @@ +## MCP Response Size Limits + +MCP tool responses have a **25,000 token limit**. When GitHub API responses exceed this limit, workflows must retry with pagination parameters, wasting turns and tokens. + +### Common Scenarios + +**Problem**: Fetching large result sets without pagination +- `list_pull_requests` with many PRs (75,897 tokens in one case) +- `pull_request_read` with large diff/comments (31,675 tokens observed) +- `search_issues`, `search_code` with many results + +**Solution**: Use proactive pagination to stay under token limits + +### Pagination Best Practices + +#### 1. Use `perPage` Parameter + +Limit results per request to prevent oversized responses: + +```bash +# Good: Fetch PRs in small batches +list_pull_requests --perPage 10 + +# Good: Get issue with limited comments +issue_read --method get_comments --perPage 20 + +# Bad: Default pagination may return too much data +list_pull_requests # May exceed 25k tokens +``` + +#### 2. Common `perPage` Values + +- **10-20**: For detailed items (PRs with diffs, issues with comments) +- **50-100**: For simpler list operations (commits, branches, labels) +- **1-5**: For exploratory queries or schema discovery + +#### 3. Handle Pagination Loops + +When you need all results: + +```bash +# Step 1: Fetch first page +result=$(list_pull_requests --perPage 20 --page 1) + +# Step 2: Check if more pages exist +# Most list operations return metadata about total count or next page + +# Step 3: Fetch subsequent pages if needed +result=$(list_pull_requests --perPage 20 --page 2) +``` + +### Tool-Specific Guidance + +#### Pull Requests + +```bash +# Fetch recent PRs in small batches +list_pull_requests --state all --perPage 10 --sort updated --direction desc + +# Get PR details without full diff/comments +pull_request_read --method get --pullNumber 123 + +# Get PR files separately if needed +pull_request_read --method get_files --pullNumber 123 --perPage 30 +``` + +#### Issues + +```bash +# List issues with pagination +list_issues --perPage 20 --page 1 + +# Get issue comments in batches +issue_read --method get_comments --issue_number 123 --perPage 20 +``` + +#### Code Search + +```bash +# Search with limited results +search_code --query "function language:go" --perPage 10 +``` + +### Error Messages to Watch For + +If you see these errors, add pagination: + +- `MCP tool "list_pull_requests" response (75897 tokens) exceeds maximum allowed tokens (25000)` +- `MCP tool "pull_request_read" response (31675 tokens) exceeds maximum allowed tokens (25000)` +- `Response too large for tool [tool_name]` + +### Performance Tips + +1. **Start small**: Use `perPage: 10` initially, increase if needed +2. **Fetch incrementally**: Get overview first, then details for specific items +3. **Avoid wildcards**: Don't fetch all data when you need specific items +4. **Use filters**: Combine `perPage` with state/label/date filters to reduce results + +### Example Workflow Pattern + +```markdown +# Analyze Recent Pull Requests + +1. Fetch 10 most recent PRs (stay under token limit) +2. For each PR, get summary without full diff +3. If detailed analysis needed, fetch files for specific PR separately +4. Process results incrementally rather than loading everything at once +``` + +This proactive approach eliminates retry loops and reduces token consumption. diff --git a/.github/workflows/smoke-claude.lock.yml b/.github/workflows/smoke-claude.lock.yml index b845fce813..5d07042010 100644 --- a/.github/workflows/smoke-claude.lock.yml +++ b/.github/workflows/smoke-claude.lock.yml @@ -3,6 +3,10 @@ # gh aw compile # For more information: https://github.com/githubnext/gh-aw/blob/main/.github/instructions/github-agentic-workflows.instructions.md # +# Resolved workflow manifest: +# Imports: +# - shared/mcp-pagination.md +# # Job Dependency Graph: # ```mermaid # graph LR @@ -1166,7 +1170,7 @@ jobs: "-e", "GITHUB_READ_ONLY=1", "-e", - "GITHUB_TOOLSETS=default", + "GITHUB_TOOLSETS=repos,pull_requests", "ghcr.io/github/github-mcp-server:v0.20.1" ], "env": { @@ -1196,6 +1200,117 @@ jobs: PROMPT_DIR="$(dirname "$GH_AW_PROMPT")" mkdir -p "$PROMPT_DIR" cat > "$GH_AW_PROMPT" << 'PROMPT_EOF' + ## MCP Response Size Limits + + MCP tool responses have a **25,000 token limit**. When GitHub API responses exceed this limit, workflows must retry with pagination parameters, wasting turns and tokens. + + ### Common Scenarios + + **Problem**: Fetching large result sets without pagination + - `list_pull_requests` with many PRs (75,897 tokens in one case) + - `pull_request_read` with large diff/comments (31,675 tokens observed) + - `search_issues`, `search_code` with many results + + **Solution**: Use proactive pagination to stay under token limits + + ### Pagination Best Practices + + #### 1. Use `perPage` Parameter + + Limit results per request to prevent oversized responses: + + ```bash + # Good: Fetch PRs in small batches + list_pull_requests --perPage 10 + + # Good: Get issue with limited comments + issue_read --method get_comments --perPage 20 + + # Bad: Default pagination may return too much data + list_pull_requests # May exceed 25k tokens + ``` + + #### 2. Common `perPage` Values + + - **10-20**: For detailed items (PRs with diffs, issues with comments) + - **50-100**: For simpler list operations (commits, branches, labels) + - **1-5**: For exploratory queries or schema discovery + + #### 3. Handle Pagination Loops + + When you need all results: + + ```bash + # Step 1: Fetch first page + result=$(list_pull_requests --perPage 20 --page 1) + + # Step 2: Check if more pages exist + # Most list operations return metadata about total count or next page + + # Step 3: Fetch subsequent pages if needed + result=$(list_pull_requests --perPage 20 --page 2) + ``` + + ### Tool-Specific Guidance + + #### Pull Requests + + ```bash + # Fetch recent PRs in small batches + list_pull_requests --state all --perPage 10 --sort updated --direction desc + + # Get PR details without full diff/comments + pull_request_read --method get --pullNumber 123 + + # Get PR files separately if needed + pull_request_read --method get_files --pullNumber 123 --perPage 30 + ``` + + #### Issues + + ```bash + # List issues with pagination + list_issues --perPage 20 --page 1 + + # Get issue comments in batches + issue_read --method get_comments --issue_number 123 --perPage 20 + ``` + + #### Code Search + + ```bash + # Search with limited results + search_code --query "function language:go" --perPage 10 + ``` + + ### Error Messages to Watch For + + If you see these errors, add pagination: + + - `MCP tool "list_pull_requests" response (75897 tokens) exceeds maximum allowed tokens (25000)` + - `MCP tool "pull_request_read" response (31675 tokens) exceeds maximum allowed tokens (25000)` + - `Response too large for tool [tool_name]` + + ### Performance Tips + + 1. **Start small**: Use `perPage: 10` initially, increase if needed + 2. **Fetch incrementally**: Get overview first, then details for specific items + 3. **Avoid wildcards**: Don't fetch all data when you need specific items + 4. **Use filters**: Combine `perPage` with state/label/date filters to reduce results + + ### Example Workflow Pattern + + ```markdown + # Analyze Recent Pull Requests + + 1. Fetch 10 most recent PRs (stay under token limit) + 2. For each PR, get summary without full diff + 3. If detailed analysis needed, fetch files for specific PR separately + 4. Process results incrementally rather than loading everything at once + ``` + + This proactive approach eliminates retry loops and reduces token consumption. + Review the last 2 merged pull requests in this repository and post summary in an issue. PROMPT_EOF @@ -1466,7 +1581,7 @@ jobs: run: | set -o pipefail # Execute Claude Code CLI with prompt from file - claude --print --mcp-config /tmp/gh-aw/mcp-config/mcp-servers.json --allowed-tools ExitPlanMode,Glob,Grep,LS,NotebookRead,Read,Task,TodoWrite,Write,mcp__github__download_workflow_run_artifact,mcp__github__get_code_scanning_alert,mcp__github__get_commit,mcp__github__get_dependabot_alert,mcp__github__get_discussion,mcp__github__get_discussion_comments,mcp__github__get_file_contents,mcp__github__get_job_logs,mcp__github__get_label,mcp__github__get_latest_release,mcp__github__get_me,mcp__github__get_notification_details,mcp__github__get_pull_request,mcp__github__get_pull_request_comments,mcp__github__get_pull_request_diff,mcp__github__get_pull_request_files,mcp__github__get_pull_request_review_comments,mcp__github__get_pull_request_reviews,mcp__github__get_pull_request_status,mcp__github__get_release_by_tag,mcp__github__get_secret_scanning_alert,mcp__github__get_tag,mcp__github__get_workflow_run,mcp__github__get_workflow_run_logs,mcp__github__get_workflow_run_usage,mcp__github__issue_read,mcp__github__list_branches,mcp__github__list_code_scanning_alerts,mcp__github__list_commits,mcp__github__list_dependabot_alerts,mcp__github__list_discussion_categories,mcp__github__list_discussions,mcp__github__list_issue_types,mcp__github__list_issues,mcp__github__list_label,mcp__github__list_notifications,mcp__github__list_pull_requests,mcp__github__list_releases,mcp__github__list_secret_scanning_alerts,mcp__github__list_starred_repositories,mcp__github__list_tags,mcp__github__list_workflow_jobs,mcp__github__list_workflow_run_artifacts,mcp__github__list_workflow_runs,mcp__github__list_workflows,mcp__github__pull_request_read,mcp__github__search_code,mcp__github__search_issues,mcp__github__search_orgs,mcp__github__search_pull_requests,mcp__github__search_repositories,mcp__github__search_users --debug --verbose --permission-mode bypassPermissions --output-format stream-json --settings /tmp/gh-aw/.claude/settings.json "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)" 2>&1 | tee /tmp/gh-aw/agent-stdio.log + claude --print --max-turns 15 --mcp-config /tmp/gh-aw/mcp-config/mcp-servers.json --allowed-tools ExitPlanMode,Glob,Grep,LS,NotebookRead,Read,Task,TodoWrite,Write,mcp__github__download_workflow_run_artifact,mcp__github__get_code_scanning_alert,mcp__github__get_commit,mcp__github__get_dependabot_alert,mcp__github__get_discussion,mcp__github__get_discussion_comments,mcp__github__get_file_contents,mcp__github__get_job_logs,mcp__github__get_label,mcp__github__get_latest_release,mcp__github__get_me,mcp__github__get_notification_details,mcp__github__get_pull_request,mcp__github__get_pull_request_comments,mcp__github__get_pull_request_diff,mcp__github__get_pull_request_files,mcp__github__get_pull_request_review_comments,mcp__github__get_pull_request_reviews,mcp__github__get_pull_request_status,mcp__github__get_release_by_tag,mcp__github__get_secret_scanning_alert,mcp__github__get_tag,mcp__github__get_workflow_run,mcp__github__get_workflow_run_logs,mcp__github__get_workflow_run_usage,mcp__github__issue_read,mcp__github__list_branches,mcp__github__list_code_scanning_alerts,mcp__github__list_commits,mcp__github__list_dependabot_alerts,mcp__github__list_discussion_categories,mcp__github__list_discussions,mcp__github__list_issue_types,mcp__github__list_issues,mcp__github__list_label,mcp__github__list_notifications,mcp__github__list_pull_requests,mcp__github__list_releases,mcp__github__list_secret_scanning_alerts,mcp__github__list_starred_repositories,mcp__github__list_tags,mcp__github__list_workflow_jobs,mcp__github__list_workflow_run_artifacts,mcp__github__list_workflow_runs,mcp__github__list_workflows,mcp__github__pull_request_read,mcp__github__search_code,mcp__github__search_issues,mcp__github__search_orgs,mcp__github__search_pull_requests,mcp__github__search_repositories,mcp__github__search_users --debug --verbose --permission-mode bypassPermissions --output-format stream-json --settings /tmp/gh-aw/.claude/settings.json "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)" 2>&1 | tee /tmp/gh-aw/agent-stdio.log env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} @@ -1481,6 +1596,7 @@ jobs: BASH_MAX_TIMEOUT_MS: "60000" GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} GH_AW_SAFE_OUTPUTS_STAGED: "true" + GH_AW_MAX_TURNS: 15 - name: Clean up network proxy hook files if: always() run: | @@ -3684,7 +3800,7 @@ jobs: run: | set -o pipefail # Execute Claude Code CLI with prompt from file - claude --print --allowed-tools 'Bash(cat),Bash(grep),Bash(head),Bash(jq),Bash(ls),Bash(tail),Bash(wc),BashOutput,ExitPlanMode,Glob,Grep,KillBash,LS,NotebookRead,Read,Task,TodoWrite' --debug --verbose --permission-mode bypassPermissions --output-format stream-json "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)" 2>&1 | tee /tmp/gh-aw/threat-detection/detection.log + claude --print --max-turns 15 --allowed-tools 'Bash(cat),Bash(grep),Bash(head),Bash(jq),Bash(ls),Bash(tail),Bash(wc),BashOutput,ExitPlanMode,Glob,Grep,KillBash,LS,NotebookRead,Read,Task,TodoWrite' --debug --verbose --permission-mode bypassPermissions --output-format stream-json "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)" 2>&1 | tee /tmp/gh-aw/threat-detection/detection.log env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} @@ -3696,6 +3812,7 @@ jobs: MCP_TOOL_TIMEOUT: "60000" BASH_DEFAULT_TIMEOUT_MS: "60000" BASH_MAX_TIMEOUT_MS: "60000" + GH_AW_MAX_TURNS: 15 - name: Parse threat detection results uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd with: diff --git a/.github/workflows/smoke-claude.md b/.github/workflows/smoke-claude.md index cf1b70419f..1a656cec3a 100644 --- a/.github/workflows/smoke-claude.md +++ b/.github/workflows/smoke-claude.md @@ -12,9 +12,14 @@ permissions: pull-requests: read name: Smoke Claude -engine: claude +engine: + id: claude + max-turns: 15 +imports: + - shared/mcp-pagination.md tools: github: + toolsets: [repos, pull_requests] safe-outputs: staged: true create-issue: