diff --git a/.github/workflows/deep-report.lock.yml b/.github/workflows/deep-report.lock.yml index f4a660ee75a..9f492b54ee5 100644 --- a/.github/workflows/deep-report.lock.yml +++ b/.github/workflows/deep-report.lock.yml @@ -24,11 +24,12 @@ # # Resolved workflow manifest: # Imports: +# - shared/discussions-data-fetch.md # - shared/jqschema.md # - shared/reporting.md # - shared/weekly-issues-data-fetch.md # -# gh-aw-metadata: {"schema_version":"v2","frontmatter_hash":"a6d0e46b1953d18e70a4029c7369fb96273432fdd02f2f5ca890a750bcfeb2b1","strict":true} +# gh-aw-metadata: {"schema_version":"v2","frontmatter_hash":"e825d8f874d57f17d744dd11ea6872a00f1f534e8ffd596140fee31b85b550bd","strict":true} name: "DeepReport - Intelligence Gathering Agent" "on": @@ -182,6 +183,9 @@ jobs: {{#runtime-import .github/workflows/shared/jqschema.md}} GH_AW_PROMPT_EOF cat << 'GH_AW_PROMPT_EOF' + {{#runtime-import .github/workflows/shared/discussions-data-fetch.md}} + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' {{#runtime-import .github/workflows/shared/weekly-issues-data-fetch.md}} GH_AW_PROMPT_EOF cat << 'GH_AW_PROMPT_EOF' @@ -356,6 +360,13 @@ jobs: GH_TOKEN: ${{ github.token }} - name: Setup jq utilities directory run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" + - env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO_NAME: ${{ github.event.repository.name }} + REPO_OWNER: ${{ github.repository_owner }} + name: Fetch discussions data + run: "# Create output directories\nmkdir -p /tmp/gh-aw/discussions-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/discussions-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/discussions-${TODAY}.json\" ]; then\n echo \"✓ Found cached discussions data from ${TODAY}\"\n cp \"$CACHE_DIR/discussions-${TODAY}.json\" /tmp/gh-aw/discussions-data/discussions.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/discussions-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/discussions-data/discussions.json > \"$CACHE_DIR/discussions-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/discussions-${TODAY}-schema.json\" /tmp/gh-aw/discussions-data/discussions-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total discussions in cache: $(jq 'length' /tmp/gh-aw/discussions-data/discussions.json)\"\nelse\n echo \"⬇ Downloading fresh discussions data...\"\n \n # Fetch OPEN discussions using GraphQL with pagination (up to GH_AW_DISCUSSIONS_COUNT, default 100)\n DISCUSSIONS_FILE=\"/tmp/gh-aw/discussions-data/discussions.json\"\n echo '[]' > \"$DISCUSSIONS_FILE\"\n \n CURSOR=\"\"\n HAS_NEXT_PAGE=true\n PAGE_COUNT=0\n \n while [ \"$HAS_NEXT_PAGE\" = \"true\" ]; do\n if [ -z \"$CURSOR\" ]; then\n CURSOR_ARG=\"\"\n else\n CURSOR_ARG=\", after: \\\"$CURSOR\\\"\"\n fi\n \n RESULT=$(gh api graphql -f query=\"\n query {\n repository(owner: \\\"$REPO_OWNER\\\", name: \\\"$REPO_NAME\\\") {\n discussions(first: 100, states: [OPEN]${CURSOR_ARG}) {\n pageInfo {\n hasNextPage\n endCursor\n }\n nodes {\n number\n title\n body\n createdAt\n updatedAt\n url\n category {\n name\n slug\n }\n author {\n login\n }\n labels(first: 10) {\n nodes {\n name\n }\n }\n }\n }\n }\n }\n \")\n \n # Extract discussions and normalize structure\n echo \"$RESULT\" | jq -r '\n .data.repository.discussions.nodes \n | map({\n number, \n title,\n body,\n createdAt, \n updatedAt,\n url,\n category: .category.name,\n categorySlug: .category.slug,\n author: (if .author then .author.login else \"unknown\" end),\n labels: [.labels.nodes[].name],\n isAgenticWorkflow: (if .body then (.body | test(\"^> AI generated by\"; \"m\")) else false end)\n })\n ' | jq -s 'add' > /tmp/gh-aw/temp_discussions.json\n \n # Merge with existing discussions\n jq -s 'add | unique_by(.number)' \"$DISCUSSIONS_FILE\" /tmp/gh-aw/temp_discussions.json > /tmp/gh-aw/merged.json\n mv /tmp/gh-aw/merged.json \"$DISCUSSIONS_FILE\"\n rm -f /tmp/gh-aw/temp_discussions.json\n \n # Check if there are more pages\n HAS_NEXT_PAGE=$(echo \"$RESULT\" | jq -r '.data.repository.discussions.pageInfo.hasNextPage')\n CURSOR=$(echo \"$RESULT\" | jq -r '.data.repository.discussions.pageInfo.endCursor')\n \n # Check if we've reached the requested count\n CURRENT_COUNT=$(jq 'length' \"$DISCUSSIONS_FILE\")\n MAX_COUNT=\"${GH_AW_DISCUSSIONS_COUNT:-100}\"\n if [ \"$CURRENT_COUNT\" -ge \"$MAX_COUNT\" ]; then\n echo \"Reached requested discussion count ($MAX_COUNT)\"\n # Trim to exact count if we have more\n jq --argjson max \"$MAX_COUNT\" '.[:$max]' \"$DISCUSSIONS_FILE\" > /tmp/gh-aw/trimmed.json\n mv /tmp/gh-aw/trimmed.json \"$DISCUSSIONS_FILE\"\n break\n fi\n \n # Safety check - break after 10 pages (1000 discussions max regardless of count)\n PAGE_COUNT=$((PAGE_COUNT + 1))\n if [ $PAGE_COUNT -ge 10 ]; then\n echo \"Reached pagination limit (10 pages)\"\n break\n fi\n done\n \n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/discussions-data/discussions.json > /tmp/gh-aw/discussions-data/discussions-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/discussions-data/discussions.json \"$CACHE_DIR/discussions-${TODAY}.json\"\n cp /tmp/gh-aw/discussions-data/discussions-schema.json \"$CACHE_DIR/discussions-${TODAY}-schema.json\"\n\n echo \"✓ Discussions data saved to cache: discussions-${TODAY}.json\"\n echo \"Total discussions found: $(jq 'length' /tmp/gh-aw/discussions-data/discussions.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Discussions data available at: /tmp/gh-aw/discussions-data/discussions.json\"\necho \"Schema available at: /tmp/gh-aw/discussions-data/discussions-schema.json\"" - env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/deep-report.md b/.github/workflows/deep-report.md index ecda4d29123..ab01844d76a 100644 --- a/.github/workflows/deep-report.md +++ b/.github/workflows/deep-report.md @@ -54,6 +54,7 @@ tools: imports: - shared/jqschema.md + - shared/discussions-data-fetch.md - shared/weekly-issues-data-fetch.md - shared/reporting.md --- @@ -83,7 +84,7 @@ Analyze recent discussions in this repository, focusing on: - **Report** discussions (category: reports) - Various agent analysis reports - **General** discussions - Other agent outputs -Use the GitHub MCP tools to list and read discussions from the past 7 days. +Pre-fetched discussions data is available at `/tmp/gh-aw/discussions-data/discussions.json` (populated by the discussions-data-fetch step). Use this file as the primary source for discussion analysis. ### Secondary: Workflow Logs @@ -155,13 +156,30 @@ jq '[.[].author.login] | unique' /tmp/gh-aw/weekly-issues-data/issues.json ### Step 1: Gather Discussion Intelligence -1. List all discussions from the past 7 days -2. For each discussion: +1. Load discussions from the pre-fetched data file at `/tmp/gh-aw/discussions-data/discussions.json` +2. Filter for discussions from the past 7 days using the `createdAt` or `updatedAt` fields +3. For each discussion: - Extract key metrics and findings - Identify the reporting agent (from tracker-id or title) - Note any warnings, alerts, or notable items - Record timestamps for trend analysis +**Example jq queries:** +```bash +# Get all discussions +jq 'length' /tmp/gh-aw/discussions-data/discussions.json + +# Get discussions from the past 7 days +DATE_7_DAYS_AGO=$(date -d '7 days ago' '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || date -v-7d '+%Y-%m-%dT%H:%M:%SZ') +jq --arg date "$DATE_7_DAYS_AGO" '[.[] | select(.updatedAt >= $date)]' /tmp/gh-aw/discussions-data/discussions.json + +# Get discussions by category slug (e.g. "reports", "audits", "daily-news") +jq '[.[] | select(.categorySlug == "reports")]' /tmp/gh-aw/discussions-data/discussions.json + +# Get AI-generated discussions only +jq '[.[] | select(.isAgenticWorkflow == true)]' /tmp/gh-aw/discussions-data/discussions.json +``` + ### Step 2: Gather Workflow Intelligence Use the gh-aw `logs` tool to: diff --git a/.github/workflows/shared/discussions-data-fetch.md b/.github/workflows/shared/discussions-data-fetch.md index dbe11dbb0fc..0cdf2ddff4f 100644 --- a/.github/workflows/shared/discussions-data-fetch.md +++ b/.github/workflows/shared/discussions-data-fetch.md @@ -43,7 +43,7 @@ steps: else echo "⬇ Downloading fresh discussions data..." - # Fetch all OPEN discussions using GraphQL with pagination + # Fetch OPEN discussions using GraphQL with pagination (up to GH_AW_DISCUSSIONS_COUNT, default 100) DISCUSSIONS_FILE="/tmp/gh-aw/discussions-data/discussions.json" echo '[]' > "$DISCUSSIONS_FILE"