diff --git a/.github/workflows/copilot-session-insights.lock.yml b/.github/workflows/copilot-session-insights.lock.yml index 7b2ed682aea..7fc0dc4e167 100644 --- a/.github/workflows/copilot-session-insights.lock.yml +++ b/.github/workflows/copilot-session-insights.lock.yml @@ -31,7 +31,7 @@ # - shared/session-analysis-charts.md # - shared/session-analysis-strategies.md # -# frontmatter-hash: 61ef9ea61ea5ac6b3eab6d60fdb1d1a5915e0fce54f0cff180f09cb16c66eae8 +# frontmatter-hash: 1b2864370d65c60ed768cb2e794d2b20256ec84f3ce347aca33dbbd2e8032bdd name: "Copilot Session Insights" "on": @@ -126,7 +126,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} name: Fetch Copilot session data - run: "# Create output directories\nmkdir -p /tmp/gh-aw/session-data\nmkdir -p /tmp/gh-aw/session-data/logs\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ]; then\n echo \"✓ Found cached session data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" /tmp/gh-aw/session-data/sessions-list.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" /tmp/gh-aw/session-data/sessions-schema.json\n \n # Restore cached log files if they exist\n if [ -d \"$CACHE_DIR/session-logs-${TODAY}\" ]; then\n echo \"✓ Found cached session logs from ${TODAY}\"\n cp -r \"$CACHE_DIR/session-logs-${TODAY}\"/* /tmp/gh-aw/session-data/logs/ 2>/dev/null || true\n echo \"Restored $(find /tmp/gh-aw/session-data/logs -type f | wc -l) session log files from cache\"\n fi\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total sessions in cache: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nelse\n echo \"⬇ Downloading fresh session data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for workflow runs from copilot/* branches\n # This fetches GitHub Copilot agent task runs by searching for workflow runs on copilot/* branches\n echo \"Fetching Copilot agent workflow runs from the last 30 days...\"\n \n # Get workflow runs from copilot/* branches\n gh api \"repos/${{ github.repository }}/actions/runs\" \\\n --paginate \\\n --jq \".workflow_runs[] | select(.head_branch | startswith(\\\"copilot/\\\")) | select(.created_at >= \\\"${DATE_30_DAYS_AGO}\\\") | {id, name, head_branch, created_at, updated_at, status, conclusion, html_url}\" \\\n | jq -s '.[0:50]' \\\n > /tmp/gh-aw/session-data/sessions-list.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > /tmp/gh-aw/session-data/sessions-schema.json\n\n # Download logs for each workflow run (limit to first 50)\n SESSION_COUNT=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\n echo \"Downloading logs for $SESSION_COUNT sessions...\"\n \n jq -r '.[].id' /tmp/gh-aw/session-data/sessions-list.json | while read -r run_id; do\n if [ -n \"$run_id\" ]; then\n echo \"Downloading logs for run: $run_id\"\n # Download workflow run logs using GitHub API\n gh api \"repos/${{ github.repository }}/actions/runs/${run_id}/logs\" \\\n > \"/tmp/gh-aw/session-data/logs/${run_id}.zip\" 2>&1 || true\n \n # Extract the logs if download succeeded\n if [ -f \"/tmp/gh-aw/session-data/logs/${run_id}.zip\" ] && [ -s \"/tmp/gh-aw/session-data/logs/${run_id}.zip\" ]; then\n unzip -q \"/tmp/gh-aw/session-data/logs/${run_id}.zip\" -d \"/tmp/gh-aw/session-data/logs/${run_id}/\" 2>/dev/null || true\n rm \"/tmp/gh-aw/session-data/logs/${run_id}.zip\"\n fi\n fi\n done\n \n LOG_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type d -mindepth 1 | wc -l)\n echo \"Session logs downloaded: $LOG_COUNT log directories\"\n\n # Store in cache with today's date\n cp /tmp/gh-aw/session-data/sessions-list.json \"$CACHE_DIR/copilot-sessions-${TODAY}.json\"\n cp /tmp/gh-aw/session-data/sessions-schema.json \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n \n # Cache the log files\n mkdir -p \"$CACHE_DIR/session-logs-${TODAY}\"\n cp -r /tmp/gh-aw/session-data/logs/* \"$CACHE_DIR/session-logs-${TODAY}/\" 2>/dev/null || true\n\n echo \"✓ Session data saved to cache: copilot-sessions-${TODAY}.json\"\n echo \"Total sessions found: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Session data available at: /tmp/gh-aw/session-data/sessions-list.json\"\necho \"Schema available at: /tmp/gh-aw/session-data/sessions-schema.json\"\necho \"Logs available at: /tmp/gh-aw/session-data/logs/\"\n\n# Set outputs for downstream use\necho \"sessions_count=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\" >> \"$GITHUB_OUTPUT\"" + run: "# Create output directories\nmkdir -p /tmp/gh-aw/session-data\nmkdir -p /tmp/gh-aw/session-data/logs\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" ]; then\n echo \"✓ Found cached session data from ${TODAY}\"\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}.json\" /tmp/gh-aw/session-data/sessions-list.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\" /tmp/gh-aw/session-data/sessions-schema.json\n \n # Restore cached log files if they exist\n if [ -d \"$CACHE_DIR/session-logs-${TODAY}\" ]; then\n echo \"✓ Found cached session logs from ${TODAY}\"\n cp -r \"$CACHE_DIR/session-logs-${TODAY}\"/* /tmp/gh-aw/session-data/logs/ 2>/dev/null || true\n echo \"Restored $(find /tmp/gh-aw/session-data/logs -type f | wc -l) session log files from cache\"\n fi\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total sessions in cache: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nelse\n echo \"⬇ Downloading fresh session data...\"\n \n # Calculate date 30 days ago\n DATE_30_DAYS_AGO=$(date -d '30 days ago' '+%Y-%m-%d' 2>/dev/null || date -v-30d '+%Y-%m-%d')\n\n # Search for workflow runs from copilot/* branches\n # This fetches GitHub Copilot agent task runs by searching for workflow runs on copilot/* branches\n echo \"Fetching Copilot agent workflow runs from the last 30 days...\"\n \n # Get workflow runs from copilot/* branches\n gh api \"repos/${{ github.repository }}/actions/runs\" \\\n --paginate \\\n --jq \".workflow_runs[] | select(.head_branch | startswith(\\\"copilot/\\\")) | select(.created_at >= \\\"${DATE_30_DAYS_AGO}\\\") | {id, name, head_branch, created_at, updated_at, status, conclusion, html_url}\" \\\n | jq -s '.[0:50]' \\\n > /tmp/gh-aw/session-data/sessions-list.json\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > /tmp/gh-aw/session-data/sessions-schema.json\n\n # Download conversation logs using gh agent-task command (limit to first 50)\n SESSION_COUNT=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\n echo \"Downloading conversation logs for $SESSION_COUNT sessions...\"\n \n # Use gh agent-task to fetch session logs with conversation transcripts\n # Extract session numbers from head_branch (format: copilot/issue-123 or copilot/task-456)\n # The number is the issue/task/PR number that the gh agent-task command uses\n jq -r '.[].head_branch' /tmp/gh-aw/session-data/sessions-list.json | while read -r branch; do\n if [ -n \"$branch\" ]; then\n # Extract number from branch name (e.g., copilot/issue-123 -> 123)\n # This is the session identifier used by gh agent-task\n session_number=$(echo \"$branch\" | sed 's/copilot\\///' | sed 's/[^0-9]//g')\n \n if [ -n \"$session_number\" ]; then\n echo \"Downloading conversation log for session #$session_number (branch: $branch)\"\n \n # Use gh agent-task view --log to get conversation transcript\n # This contains the agent's internal monologue, tool calls, and reasoning\n gh agent-task view --repo \"${{ github.repository }}\" \"$session_number\" --log \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-conversation.txt\" 2>&1 || {\n echo \"Warning: Could not fetch conversation log for session #$session_number\"\n # If gh agent-task fails, fall back to downloading GitHub Actions logs\n # This ensures we have some data even if agent-task command is unavailable\n run_id=$(jq -r \".[] | select(.head_branch == \\\"$branch\\\") | .id\" /tmp/gh-aw/session-data/sessions-list.json)\n if [ -n \"$run_id\" ]; then\n echo \"Falling back to GitHub Actions logs for run ID: $run_id\"\n gh api \"repos/${{ github.repository }}/actions/runs/${run_id}/logs\" \\\n > \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" 2>&1 || true\n \n if [ -f \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ] && [ -s \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" ]; then\n unzip -q \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\" -d \"/tmp/gh-aw/session-data/logs/${session_number}/\" 2>/dev/null || true\n rm \"/tmp/gh-aw/session-data/logs/${session_number}-actions.zip\"\n fi\n fi\n }\n fi\n fi\n done\n \n LOG_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type f -name \"*-conversation.txt\" | wc -l)\n echo \"Conversation logs downloaded: $LOG_COUNT session logs\"\n \n FALLBACK_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type d -mindepth 1 | wc -l)\n if [ \"$FALLBACK_COUNT\" -gt 0 ]; then\n echo \"Fallback GitHub Actions logs: $FALLBACK_COUNT sessions\"\n fi\n\n # Store in cache with today's date\n cp /tmp/gh-aw/session-data/sessions-list.json \"$CACHE_DIR/copilot-sessions-${TODAY}.json\"\n cp /tmp/gh-aw/session-data/sessions-schema.json \"$CACHE_DIR/copilot-sessions-${TODAY}-schema.json\"\n \n # Cache the log files\n mkdir -p \"$CACHE_DIR/session-logs-${TODAY}\"\n cp -r /tmp/gh-aw/session-data/logs/* \"$CACHE_DIR/session-logs-${TODAY}/\" 2>/dev/null || true\n\n echo \"✓ Session data saved to cache: copilot-sessions-${TODAY}.json\"\n echo \"Total sessions found: $(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Session data available at: /tmp/gh-aw/session-data/sessions-list.json\"\necho \"Schema available at: /tmp/gh-aw/session-data/sessions-schema.json\"\necho \"Logs available at: /tmp/gh-aw/session-data/logs/\"\n\n# Set outputs for downstream use\necho \"sessions_count=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json)\" >> \"$GITHUB_OUTPUT\"" - name: Setup Python environment run: "# Create working directory for Python scripts\nmkdir -p /tmp/gh-aw/python\nmkdir -p /tmp/gh-aw/python/data\nmkdir -p /tmp/gh-aw/python/charts\nmkdir -p /tmp/gh-aw/python/artifacts\n\necho \"Python environment setup complete\"\necho \"Working directory: /tmp/gh-aw/python\"\necho \"Data directory: /tmp/gh-aw/python/data\"\necho \"Charts directory: /tmp/gh-aw/python/charts\"\necho \"Artifacts directory: /tmp/gh-aw/python/artifacts\"\n" - name: Install Python scientific libraries diff --git a/.github/workflows/copilot-session-insights.md b/.github/workflows/copilot-session-insights.md index 43475aaa60b..7b9ef8b41cb 100644 --- a/.github/workflows/copilot-session-insights.md +++ b/.github/workflows/copilot-session-insights.md @@ -70,6 +70,8 @@ Analyze approximately 50 Copilot agent sessions to identify: - Prompt quality indicators - Opportunities for improvement +**NEW**: This workflow now has access to actual agent conversation transcripts (not just infrastructure logs), enabling true behavioral analysis through the agent's internal monologue and reasoning process. + Create a comprehensive report and publish it as a GitHub Discussion for team review. ## Current Context @@ -78,6 +80,7 @@ Create a comprehensive report and publish it as a GitHub Discussion for team rev - **Analysis Period**: Most recent ~50 agent sessions - **Cache Memory**: `/tmp/gh-aw/cache-memory/` - **Pre-fetched Data**: Available at `/tmp/gh-aw/session-data/` +- **Conversation Logs**: Now available with agent's internal monologue and reasoning ## Task Overview @@ -85,22 +88,50 @@ Create a comprehensive report and publish it as a GitHub Discussion for team rev **Pre-fetched Data Available**: Session data has been fetched by the `copilot-session-data-fetch` shared module: - `/tmp/gh-aw/session-data/sessions-list.json` - List of sessions with metadata -- `/tmp/gh-aw/session-data/logs/` - Individual session log files +- `/tmp/gh-aw/session-data/logs/` - **Conversation transcript files** (new!) + - `{session_number}-conversation.txt` - Agent's internal monologue, reasoning, and tool usage + - `{session_number}/` - GitHub Actions logs (fallback only) + +**What's in the Conversation Logs**: +- Agent's step-by-step reasoning and planning +- Internal monologue showing decision-making process +- Tool calls and their outputs +- Code changes and validation attempts +- Error handling and recovery strategies **Verify Setup**: 1. Confirm session data was downloaded successfully -2. Initialize or restore cache-memory from `/tmp/gh-aw/cache-memory/` -3. Load historical analysis data if available +2. Check that conversation logs are available (primary source) +3. Initialize or restore cache-memory from `/tmp/gh-aw/cache-memory/` +4. Load historical analysis data if available ### Phase 1: Session Analysis -For each downloaded session log in `/tmp/gh-aw/session-data/logs/`: +For each downloaded session in `/tmp/gh-aw/session-data/`: + +1. **Load Conversation Logs**: Read the agent's conversation transcript from `{session_number}-conversation.txt` files. These contain: + - Agent's internal reasoning and planning + - Tool usage and results + - Code changes and validation steps + - Error recovery attempts -1. **Load Historical Context**: Check cache memory for previous analysis results, known strategies, and identified patterns (see `session-analysis-strategies` shared module) +2. **Load Historical Context**: Check cache memory for previous analysis results, known strategies, and identified patterns (see `session-analysis-strategies` shared module) -2. **Apply Analysis Strategies**: Use the standard and experimental strategies defined in the imported `session-analysis-strategies` module +3. **Apply Analysis Strategies**: Use the standard and experimental strategies defined in the imported `session-analysis-strategies` module -3. **Collect Session Data**: Gather metrics for each session as defined in the shared module +4. **Extract Behavioral Insights**: From the conversation logs, identify: + - **Reasoning patterns**: How does the agent approach problems? + - **Tool usage effectiveness**: Which tools are used and how successful are they? + - **Error recovery**: How does the agent handle and recover from errors? + - **Planning quality**: Does the agent plan before acting or iterate randomly? + - **Prompt understanding**: Does the agent correctly interpret the user's request? + +5. **Collect Session Metrics**: Gather metrics for each session: + - Session duration and completion status + - Number of tool calls and types + - Error count and recovery success + - Code quality indicators from the conversation + - Prompt clarity assessment based on agent's understanding ### Phase 2: Generate Trend Charts @@ -367,6 +398,36 @@ _Workflow: ${{ github.workflow }}_ - **Sanitization**: Redact any sensitive information from examples - **Validation**: Verify all data before analysis - **Safe Processing**: Never execute code from sessions +- **Conversation Log Analysis**: Analyze the agent's reasoning and tool usage patterns, but always sanitize examples before including in reports + +### Working with Conversation Logs + +**Accessing Logs**: +```bash +# List available conversation logs +find /tmp/gh-aw/session-data/logs -type f -name "*-conversation.txt" + +# Read a specific conversation log +cat /tmp/gh-aw/session-data/logs/123-conversation.txt + +# Count conversation logs +find /tmp/gh-aw/session-data/logs -type f -name "*-conversation.txt" | wc -l +``` + +**What to Look For in Conversation Logs**: +1. **Agent's Planning**: Does the agent plan before acting? +2. **Tool Selection**: Which tools does the agent choose and why? +3. **Error Handling**: How does the agent respond to errors? +4. **Code Quality**: Does the agent validate its changes? +5. **Prompt Understanding**: Does the agent correctly interpret the task? +6. **Iteration Patterns**: Does the agent get stuck in loops? + +**Analysis Patterns**: +- Look for repeated phrases indicating confusion or loops +- Identify successful tool usage patterns +- Track error recovery strategies +- Measure clarity of agent's reasoning +- Assess quality of code changes from the log commentary ### Analysis Quality diff --git a/.github/workflows/shared/copilot-session-data-fetch.md b/.github/workflows/shared/copilot-session-data-fetch.md index 13a12249b29..83e120c35c2 100644 --- a/.github/workflows/shared/copilot-session-data-fetch.md +++ b/.github/workflows/shared/copilot-session-data-fetch.md @@ -17,6 +17,7 @@ tools: key: copilot-session-data bash: - "gh api *" + - "gh agent-task *" - "jq *" - "/tmp/gh-aw/jqschema.sh" - "mkdir *" @@ -25,6 +26,7 @@ tools: - "unzip *" - "find *" - "rm *" + - "cat *" steps: - name: Fetch Copilot session data @@ -81,27 +83,52 @@ steps: # Generate schema for reference /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/session-data/sessions-list.json > /tmp/gh-aw/session-data/sessions-schema.json - # Download logs for each workflow run (limit to first 50) + # Download conversation logs using gh agent-task command (limit to first 50) SESSION_COUNT=$(jq 'length' /tmp/gh-aw/session-data/sessions-list.json) - echo "Downloading logs for $SESSION_COUNT sessions..." + echo "Downloading conversation logs for $SESSION_COUNT sessions..." - jq -r '.[].id' /tmp/gh-aw/session-data/sessions-list.json | while read -r run_id; do - if [ -n "$run_id" ]; then - echo "Downloading logs for run: $run_id" - # Download workflow run logs using GitHub API - gh api "repos/${{ github.repository }}/actions/runs/${run_id}/logs" \ - > "/tmp/gh-aw/session-data/logs/${run_id}.zip" 2>&1 || true + # Use gh agent-task to fetch session logs with conversation transcripts + # Extract session numbers from head_branch (format: copilot/issue-123 or copilot/task-456) + # The number is the issue/task/PR number that the gh agent-task command uses + jq -r '.[].head_branch' /tmp/gh-aw/session-data/sessions-list.json | while read -r branch; do + if [ -n "$branch" ]; then + # Extract number from branch name (e.g., copilot/issue-123 -> 123) + # This is the session identifier used by gh agent-task + session_number=$(echo "$branch" | sed 's/copilot\///' | sed 's/[^0-9]//g') - # Extract the logs if download succeeded - if [ -f "/tmp/gh-aw/session-data/logs/${run_id}.zip" ] && [ -s "/tmp/gh-aw/session-data/logs/${run_id}.zip" ]; then - unzip -q "/tmp/gh-aw/session-data/logs/${run_id}.zip" -d "/tmp/gh-aw/session-data/logs/${run_id}/" 2>/dev/null || true - rm "/tmp/gh-aw/session-data/logs/${run_id}.zip" + if [ -n "$session_number" ]; then + echo "Downloading conversation log for session #$session_number (branch: $branch)" + + # Use gh agent-task view --log to get conversation transcript + # This contains the agent's internal monologue, tool calls, and reasoning + gh agent-task view --repo "${{ github.repository }}" "$session_number" --log \ + > "/tmp/gh-aw/session-data/logs/${session_number}-conversation.txt" 2>&1 || { + echo "Warning: Could not fetch conversation log for session #$session_number" + # If gh agent-task fails, fall back to downloading GitHub Actions logs + # This ensures we have some data even if agent-task command is unavailable + run_id=$(jq -r ".[] | select(.head_branch == \"$branch\") | .id" /tmp/gh-aw/session-data/sessions-list.json) + if [ -n "$run_id" ]; then + echo "Falling back to GitHub Actions logs for run ID: $run_id" + gh api "repos/${{ github.repository }}/actions/runs/${run_id}/logs" \ + > "/tmp/gh-aw/session-data/logs/${session_number}-actions.zip" 2>&1 || true + + if [ -f "/tmp/gh-aw/session-data/logs/${session_number}-actions.zip" ] && [ -s "/tmp/gh-aw/session-data/logs/${session_number}-actions.zip" ]; then + unzip -q "/tmp/gh-aw/session-data/logs/${session_number}-actions.zip" -d "/tmp/gh-aw/session-data/logs/${session_number}/" 2>/dev/null || true + rm "/tmp/gh-aw/session-data/logs/${session_number}-actions.zip" + fi + fi + } fi fi done - LOG_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type d -mindepth 1 | wc -l) - echo "Session logs downloaded: $LOG_COUNT log directories" + LOG_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type f -name "*-conversation.txt" | wc -l) + echo "Conversation logs downloaded: $LOG_COUNT session logs" + + FALLBACK_COUNT=$(find /tmp/gh-aw/session-data/logs/ -type d -mindepth 1 | wc -l) + if [ "$FALLBACK_COUNT" -gt 0 ]; then + echo "Fallback GitHub Actions logs: $FALLBACK_COUNT sessions" + fi # Store in cache with today's date cp /tmp/gh-aw/session-data/sessions-list.json "$CACHE_DIR/copilot-sessions-${TODAY}.json" @@ -140,12 +167,20 @@ This shared component fetches GitHub Copilot agent session data by analyzing wor 4. If cache doesn't exist: - Calculates the date 30 days ago (cross-platform compatible) - Fetches all workflow runs from branches starting with `copilot/` using GitHub API - - Downloads logs for up to 50 most recent runs - - Extracts and organizes log files + - **Downloads conversation logs** using `gh agent-task view --log` for up to 50 most recent sessions + - Falls back to GitHub Actions logs if agent-task command fails - Saves data to cache with date-based filename (e.g., `copilot-sessions-2024-11-22.json`) - Copies data to working directory for use 5. Generates a schema of the data structure +### What's New: Conversation Transcript Access + +**This module now fetches actual agent conversation logs** instead of just infrastructure logs: +- Uses `gh agent-task view --log` to access agent session logs +- Logs include agent's internal monologue, reasoning, and tool usage +- Enables true behavioral pattern analysis and prompt quality assessment +- Falls back to GitHub Actions logs if agent-task command is unavailable + ### Caching Strategy - **Cache Key**: `copilot-session-data` for workflow-level sharing @@ -155,13 +190,15 @@ This shared component fetches GitHub Copilot agent session data by analyzing wor - Multiple workflows running on the same day share the same session data - Reduces GitHub API rate limit usage - Faster workflow execution after first fetch of the day - - Avoids need for `gh agent-task` extension + - Includes conversation transcript cache ### Output Files - **`/tmp/gh-aw/session-data/sessions-list.json`**: Full session data including run ID, name, branch, timestamps, status, conclusion, and URL - **`/tmp/gh-aw/session-data/sessions-schema.json`**: JSON schema showing the structure of the session data -- **`/tmp/gh-aw/session-data/logs/`**: Directory containing extracted workflow run logs +- **`/tmp/gh-aw/session-data/logs/`**: Directory containing session conversation logs + - **`{session_number}-conversation.txt`**: Agent conversation transcript with internal monologue and tool usage (primary) + - **`{session_number}/`**: GitHub Actions infrastructure logs (fallback only) - **`/tmp/gh-aw/cache-memory/copilot-sessions-YYYY-MM-DD.json`**: Cached session data with date - **`/tmp/gh-aw/cache-memory/copilot-sessions-YYYY-MM-DD-schema.json`**: Cached schema with date - **`/tmp/gh-aw/cache-memory/session-logs-YYYY-MM-DD/`**: Cached log files with date @@ -187,29 +224,46 @@ jq --arg today "$TODAY" '[.[] | select(.created_at >= $today)]' /tmp/gh-aw/sessi # Count total sessions jq 'length' /tmp/gh-aw/session-data/sessions-list.json -# Get run IDs -jq '[.[].id]' /tmp/gh-aw/session-data/sessions-list.json +# Get session numbers for conversation logs +jq -r '.[].head_branch' /tmp/gh-aw/session-data/sessions-list.json | sed 's/copilot\///' | sed 's/[^0-9]//g' -# List log directories -find /tmp/gh-aw/session-data/logs -type d -mindepth 1 +# List conversation log files +find /tmp/gh-aw/session-data/logs -type f -name "*-conversation.txt" + +# Read a specific conversation log (session number 123) +cat /tmp/gh-aw/session-data/logs/123-conversation.txt ``` ### Requirements - Automatically imports `jqschema.md` for schema generation (via transitive import closure) - Uses GitHub Actions API to fetch workflow runs from `copilot/*` branches +- **Uses `gh agent-task view --log` to fetch conversation transcripts** (requires gh CLI v2.80.0+) - Cross-platform date calculation (works on both GNU and BSD date commands) - Cache-memory tool is automatically configured for data persistence +- Falls back to GitHub Actions infrastructure logs if `gh agent-task` is unavailable ### Why Branch-Based Search? -GitHub Copilot creates branches with the `copilot/` prefix, making branch-based workflow run search a reliable way to identify Copilot agent sessions without requiring the `gh agent-task` extension. +GitHub Copilot creates branches with the `copilot/` prefix, making branch-based workflow run search a reliable way to identify Copilot agent sessions. + +### Conversation Log Access + +This module now provides access to **actual agent conversation transcripts** via the `gh agent-task view --log` command: -### Advantages Over gh agent-task Extension +**What's in the conversation logs:** +- Agent's internal monologue and reasoning +- Tool calls and their results +- Step-by-step problem-solving approach +- Code changes and validations +- Error handling and recovery attempts -- **No Extension Required**: Works without installing `gh agent-task` CLI extension -- **Better Caching**: Leverages cache-memory for efficient data reuse -- **API-Based**: Uses standard GitHub API endpoints accessible to all users +**Benefits for analysis:** +- True behavioral pattern analysis (not just infrastructure metrics) +- Prompt quality assessment based on actual responses +- Success factor identification from agent reasoning +- Failure signal detection from error patterns +- Tool usage effectiveness analysis - **Broader Access**: Works in all GitHub environments, not just Enterprise with Copilot ### Cache Behavior