From 42151cf3685a4fb0b911aa7bf63ed0303330229e Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 11:45:55 -0500
Subject: [PATCH 01/60] Add flaky test retry support for gtests and pytests

Add retry logic for gtest binaries via GTEST_MAX_RETRIES (default 1)
and pytest reruns via --reruns 2 --reruns-delay 5. Tests that fail
then pass on retry are classified as flaky rather than failures.
Add pytest-rerunfailures as a test dependency.
---
 ci/run_ctests.sh               | 30 +++++++++++++++++++++++++++---
 ci/run_cuopt_pytests.sh        |  2 +-
 ci/run_cuopt_server_pytests.sh |  2 +-
 dependencies.yaml              |  1 +
 4 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/ci/run_ctests.sh b/ci/run_ctests.sh
index fc1de8e1b4..f1d57519b1 100755
--- a/ci/run_ctests.sh
+++ b/ci/run_ctests.sh
@@ -21,16 +21,40 @@ else
     exit 1
 fi
 
-for gt in "${GTEST_DIR}"/*_TEST; do
+GTEST_MAX_RETRIES=${GTEST_MAX_RETRIES:-1}
+
+run_gtest_with_retry() {
+    local gt="$1"
+    shift
+    local test_name
     test_name=$(basename "${gt}")
+
     echo "Running gtest ${test_name}"
-    "${gt}" "$@"
+    if "${gt}" "$@"; then
+        return 0
+    fi
+
+    local attempt
+    for attempt in $(seq 1 "${GTEST_MAX_RETRIES}"); do
+        echo "WARNING: ${test_name} failed, retry ${attempt}/${GTEST_MAX_RETRIES}"
+        if "${gt}" "$@"; then
+            echo "FLAKY: ${test_name} passed on retry ${attempt}"
+            return 0
+        fi
+    done
+
+    echo "FAILED: ${test_name} failed after $((GTEST_MAX_RETRIES + 1)) attempts"
+    return 1
+}
+
+for gt in "${GTEST_DIR}"/*_TEST; do
+    run_gtest_with_retry "${gt}" "$@"
 done
 
 # Run C_API_TEST with CPU memory for local solves (excluding time limit tests)
 if [ -x "${GTEST_DIR}/C_API_TEST" ]; then
   echo "Running gtest C_API_TEST with CUOPT_USE_CPU_MEM_FOR_LOCAL"
-  CUOPT_USE_CPU_MEM_FOR_LOCAL=1 "${GTEST_DIR}/C_API_TEST" --gtest_filter=-c_api/TimeLimitTestFixture.* "$@"
+  CUOPT_USE_CPU_MEM_FOR_LOCAL=1 run_gtest_with_retry "${GTEST_DIR}/C_API_TEST" --gtest_filter=-c_api/TimeLimitTestFixture.* "$@"
 else
   echo "Skipping C_API_TEST with CUOPT_USE_CPU_MEM_FOR_LOCAL (binary not found)"
 fi
diff --git a/ci/run_cuopt_pytests.sh b/ci/run_cuopt_pytests.sh
index 66e996715a..080fa42a1b 100755
--- a/ci/run_cuopt_pytests.sh
+++ b/ci/run_cuopt_pytests.sh
@@ -9,4 +9,4 @@ set -euo pipefail
 # Support invoking run_cuopt_pytests.sh outside the script directory
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cuopt/cuopt/
 
-pytest -s --cache-clear "$@" tests
+pytest -s --cache-clear --reruns 2 --reruns-delay 5 "$@" tests
diff --git a/ci/run_cuopt_server_pytests.sh b/ci/run_cuopt_server_pytests.sh
index 4cb361a473..75d87d255d 100755
--- a/ci/run_cuopt_server_pytests.sh
+++ b/ci/run_cuopt_server_pytests.sh
@@ -9,4 +9,4 @@ set -euo pipefail
 # Support invoking run_cuopt_server_pytests.sh outside the script directory
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cuopt_server/cuopt_server/
 
-pytest -s --cache-clear "$@" tests
+pytest -s --cache-clear --reruns 2 --reruns-delay 5 "$@" tests
diff --git a/dependencies.yaml b/dependencies.yaml
index 057fc2a318..18d479a99f 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -340,6 +340,7 @@ dependencies:
         packages:
           - pytest<9.0
           - pytest-cov
+          - pytest-rerunfailures
   test_python_cuopt:
     common:
       - output_types: [conda]

From 1fc5cef01faeb128d70e01dc7459e8913804b6d0 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 11:46:03 -0500
Subject: [PATCH 02/60] Add nightly report generator and shared helpers

Add matrix-aware nightly test report generator that parses JUnit XML,
classifies failures as new/recurring/flaky/stabilized, maintains
per-matrix failure history on S3, and outputs Markdown, HTML, and JSON
reports. Extract S3 helpers into shared module and shell helper to
eliminate duplication across test scripts.
---
 ci/utils/nightly_report.py        | 849 ++++++++++++++++++++++++++++++
 ci/utils/nightly_report_helper.sh |  93 ++++
 ci/utils/s3_helpers.py            |  87 +++
 3 files changed, 1029 insertions(+)
 create mode 100755 ci/utils/nightly_report.py
 create mode 100755 ci/utils/nightly_report_helper.sh
 create mode 100644 ci/utils/s3_helpers.py

diff --git a/ci/utils/nightly_report.py b/ci/utils/nightly_report.py
new file mode 100755
index 0000000000..40e2e65798
--- /dev/null
+++ b/ci/utils/nightly_report.py
@@ -0,0 +1,849 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Nightly test report generator for cuOpt CI.
+
+Parses JUnit XML test results, classifies failures as flaky vs genuine,
+maintains a failure history database on S3, and outputs:
+  - HTML report (detailed, uploaded to S3 and linked from Slack)
+  - Markdown summary (for $GITHUB_STEP_SUMMARY or terminal)
+  - JSON summary (for downstream consumers like Slack notifier and dashboard)
+
+Each CI matrix job (CUDA version x Python version x architecture) runs this
+script independently.  The --test-type and --matrix-label flags identify the
+job so that history and summaries are stored per-matrix-combo.
+
+History lifecycle:
+  1. Download history from S3 (falls back to empty if not found)
+  2. Classify this run's results
+  3. Update history: mark new failures, bump recurring counts, resolve stabilized tests
+  4. Upload updated history back to S3
+  5. Generate reports (HTML, Markdown, JSON, GitHub Step Summary)
+  6. Upload per-run JSON snapshot to S3 summaries dir (for aggregation)
+
+Usage:
+  python ci/utils/nightly_report.py \\
+      --results-dir test-results/ \\
+      --output-dir report-output/ \\
+      --sha abc123 \\
+      --test-type python \\
+      --matrix-label cuda12.9-py3.12-x86_64 \\
+      --s3-history-uri s3://bucket/ci_test_reports/nightly/history/python-main-cuda12.9-py3.12-x86_64.json \\
+      --s3-summary-uri s3://bucket/ci_test_reports/nightly/summaries/2026-04-13/python-cuda12.9-py3.12-x86_64.json
+"""
+
+import argparse
+import json
+import os
+import sys
+from collections import defaultdict
+from datetime import datetime, timezone
+from pathlib import Path
+from xml.etree import ElementTree
+
+# Ensure ci/utils is importable when invoked as a script
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from s3_helpers import s3_download, s3_upload  # noqa: E402
+
+EMPTY_HISTORY = {"_schema_version": 1, "tests": {}}
+
+
+# ---------------------------------------------------------------------------
+# JUnit XML parsing
+# ---------------------------------------------------------------------------
+
+def parse_junit_xml(xml_path):
+    """Parse a JUnit XML file and return a list of test result dicts."""
+    results = []
+    try:
+        tree = ElementTree.parse(xml_path)
+    except ElementTree.ParseError as e:
+        print(f"WARNING: Failed to parse {xml_path}: {e}", file=sys.stderr)
+        return results
+
+    root = tree.getroot()
+
+    if root.tag == "testsuites":
+        suites = root.findall("testsuite")
+    elif root.tag == "testsuite":
+        suites = [root]
+    else:
+        return results
+
+    for suite in suites:
+        suite_name = suite.get("name", os.path.basename(xml_path))
+        for testcase in suite.findall("testcase"):
+            name = testcase.get("name", "unknown")
+            classname = testcase.get("classname", "")
+            time_taken = testcase.get("time", "0")
+
+            failure = testcase.find("failure")
+            error = testcase.find("error")
+            skipped = testcase.find("skipped")
+
+            if skipped is not None:
+                status = "skipped"
+                message = skipped.get("message", "")
+            elif failure is not None:
+                status = "failed"
+                message = failure.get("message", "")
+                if failure.text:
+                    message = failure.text[:500]
+            elif error is not None:
+                status = "error"
+                message = error.get("message", "")
+                if error.text:
+                    message = error.text[:500]
+            else:
+                status = "passed"
+                message = ""
+
+            results.append({
+                "suite": suite_name,
+                "classname": classname,
+                "name": name,
+                "status": status,
+                "time": time_taken,
+                "message": message,
+                "source_file": str(xml_path),
+            })
+
+    return results
+
+
+def collect_all_results(results_dir):
+    """Collect test results from all JUnit XML files in a directory."""
+    results_dir = Path(results_dir)
+    all_results = []
+    for xml_file in sorted(results_dir.rglob("*.xml")):
+        all_results.extend(parse_junit_xml(xml_file))
+    return all_results
+
+
+# ---------------------------------------------------------------------------
+# Classification
+# ---------------------------------------------------------------------------
+
+def classify_failures(results):
+    """
+    Classify test results into passed, failed, flaky, skipped, and error.
+
+    pytest-rerunfailures records reruns as additional <testcase> entries.
+    A test that failed then passed on rerun is flaky.
+    """
+    test_groups = defaultdict(list)
+    for r in results:
+        key = f"{r['suite']}::{r['classname']}::{r['name']}"
+        test_groups[key].append(r)
+
+    classified = {
+        "passed": [],
+        "failed": [],
+        "flaky": [],
+        "skipped": [],
+        "error": [],
+    }
+
+    for key, entries in test_groups.items():
+        statuses = [e["status"] for e in entries]
+
+        if all(s == "skipped" for s in statuses):
+            classified["skipped"].append(entries[0])
+        elif any(s == "passed" for s in statuses):
+            if any(s in ("failed", "error") for s in statuses):
+                entry = entries[-1].copy()
+                entry["status"] = "flaky"
+                entry["retry_count"] = sum(
+                    1 for s in statuses if s in ("failed", "error")
+                )
+                classified["flaky"].append(entry)
+            else:
+                classified["passed"].append(entries[-1])
+        elif any(s == "error" for s in statuses):
+            classified["error"].append(entries[-1])
+        else:
+            classified["failed"].append(entries[-1])
+
+    return classified
+
+
+# ---------------------------------------------------------------------------
+# History management
+# ---------------------------------------------------------------------------
+
+def load_history(history_path):
+    """Load failure history from a local JSON file."""
+    try:
+        with open(history_path) as f:
+            data = json.load(f)
+            if "tests" in data:
+                return data
+    except (FileNotFoundError, json.JSONDecodeError):
+        pass
+    return dict(EMPTY_HISTORY)
+
+
+def update_history(history, classified, sha, date_str):
+    """
+    Update failure history with this run's results.
+
+    Returns (history, new_failures, recurring_failures, resolved_tests).
+    resolved_tests = previously active failures that passed this run (stabilized).
+    """
+    tests = history.setdefault("tests", {})
+    new_failures = []
+    recurring_failures = []
+    resolved_tests = []
+
+    # --- Genuine failures ---
+    for entry in classified["failed"] + classified["error"]:
+        test_key = f"{entry['suite']}::{entry['classname']}::{entry['name']}"
+
+        if test_key in tests and tests[test_key]["status"] == "active":
+            tests[test_key]["last_seen_date"] = date_str
+            tests[test_key]["last_seen_sha"] = sha
+            tests[test_key]["failure_count"] += 1
+            recurring_failures.append(
+                {**entry, "first_seen": tests[test_key]["first_seen_date"]}
+            )
+        else:
+            tests[test_key] = {
+                "suite": entry["suite"],
+                "classname": entry["classname"],
+                "name": entry["name"],
+                "first_seen_date": date_str,
+                "first_seen_sha": sha,
+                "last_seen_date": date_str,
+                "last_seen_sha": sha,
+                "failure_count": 1,
+                "is_flaky": False,
+                "status": "active",
+            }
+            new_failures.append(entry)
+
+    # --- Flaky tests ---
+    for entry in classified["flaky"]:
+        test_key = f"{entry['suite']}::{entry['classname']}::{entry['name']}"
+        if test_key in tests:
+            tests[test_key]["last_seen_date"] = date_str
+            tests[test_key]["last_seen_sha"] = sha
+            tests[test_key]["failure_count"] += 1
+            tests[test_key]["is_flaky"] = True
+        else:
+            tests[test_key] = {
+                "suite": entry["suite"],
+                "classname": entry["classname"],
+                "name": entry["name"],
+                "first_seen_date": date_str,
+                "first_seen_sha": sha,
+                "last_seen_date": date_str,
+                "last_seen_sha": sha,
+                "failure_count": 1,
+                "is_flaky": True,
+                "status": "active",
+            }
+
+    # --- Resolve stabilized tests ---
+    passed_keys = set()
+    for entry in classified["passed"]:
+        test_key = f"{entry['suite']}::{entry['classname']}::{entry['name']}"
+        passed_keys.add(test_key)
+
+    for test_key in passed_keys:
+        if test_key in tests and tests[test_key]["status"] == "active":
+            rec = tests[test_key]
+            rec["status"] = "resolved"
+            rec["resolved_date"] = date_str
+            rec["resolved_sha"] = sha
+            resolved_tests.append({
+                "suite": rec["suite"],
+                "classname": rec["classname"],
+                "name": rec["name"],
+                "first_seen": rec["first_seen_date"],
+                "failure_count": rec["failure_count"],
+                "was_flaky": rec.get("is_flaky", False),
+            })
+
+    return history, new_failures, recurring_failures, resolved_tests
+
+
+def save_history(history, history_path):
+    """Write history to a local JSON file."""
+    with open(history_path, "w") as f:
+        json.dump(history, f, indent=2, sort_keys=True)
+        f.write("\n")
+
+
+# ---------------------------------------------------------------------------
+# Report generation
+# ---------------------------------------------------------------------------
+
+def generate_markdown_report(
+    classified, new_failures, recurring_failures, resolved_tests, history,
+    test_type="", matrix_label="", sha="", date_str="",
+):
+    """Generate a Markdown summary report."""
+    lines = []
+    title = "# Nightly Test Report"
+    if test_type:
+        title += f" — {test_type}"
+    if matrix_label:
+        title += f" [{matrix_label}]"
+    lines.append(title)
+    lines.append("")
+    if date_str or sha:
+        meta_parts = []
+        if date_str:
+            meta_parts.append(f"**Date:** {date_str}")
+        if sha:
+            meta_parts.append(f"**Commit:** `{sha[:12]}`")
+        if matrix_label:
+            meta_parts.append(f"**Matrix:** {matrix_label}")
+        lines.append(" | ".join(meta_parts))
+        lines.append("")
+
+    total_passed = len(classified["passed"])
+    total_failed = len(classified["failed"]) + len(classified["error"])
+    total_flaky = len(classified["flaky"])
+    total_skipped = len(classified["skipped"])
+    total = total_passed + total_failed + total_flaky + total_skipped
+
+    lines.append("## Summary")
+    lines.append("")
+    lines.append("| Metric | Count |")
+    lines.append("|--------|-------|")
+    lines.append(f"| Total tests | {total} |")
+    lines.append(f"| Passed | {total_passed} |")
+    lines.append(f"| **Genuine failures** | **{total_failed}** |")
+    lines.append(f"| Flaky (passed on retry) | {total_flaky} |")
+    lines.append(f"| Skipped | {total_skipped} |")
+    if resolved_tests:
+        lines.append(f"| **Stabilized (were failing, now pass)** | **{len(resolved_tests)}** |")
+    lines.append("")
+
+    # -- New genuine failures (highest priority) --
+    if new_failures:
+        lines.append("## NEW Failures (not previously seen)")
+        lines.append("")
+        lines.append("| Suite | Test | Error |")
+        lines.append("|-------|------|-------|")
+        for entry in new_failures:
+            short_msg = (
+                entry.get("message", "")[:80].replace("\n", " ").replace("|", "\\|")
+            )
+            lines.append(f"| {entry['suite']} | `{entry['name']}` | {short_msg} |")
+        lines.append("")
+
+    # -- Recurring failures --
+    if recurring_failures:
+        lines.append("## Recurring Failures")
+        lines.append("")
+        lines.append("| Suite | Test | First seen | Failure count | Error |")
+        lines.append("|-------|------|------------|---------------|-------|")
+        for entry in recurring_failures:
+            short_msg = (
+                entry.get("message", "")[:60].replace("\n", " ").replace("|", "\\|")
+            )
+            first_seen = entry.get("first_seen", "unknown")
+            test_key = f"{entry['suite']}::{entry['classname']}::{entry['name']}"
+            count = history.get("tests", {}).get(test_key, {}).get("failure_count", "?")
+            lines.append(
+                f"| {entry['suite']} | `{entry['name']}` | {first_seen} | {count} | {short_msg} |"
+            )
+        lines.append("")
+
+    # -- Stabilized tests --
+    if resolved_tests:
+        lines.append("## Stabilized Tests (were failing, now passing)")
+        lines.append("")
+        lines.append("| Suite | Test | Was failing since | Total failure count | Was flaky? |")
+        lines.append("|-------|------|-------------------|---------------------|------------|")
+        for entry in resolved_tests:
+            flaky_badge = "Yes" if entry.get("was_flaky") else "No"
+            lines.append(
+                f"| {entry['suite']} | `{entry['name']}` | {entry['first_seen']} "
+                f"| {entry['failure_count']} | {flaky_badge} |"
+            )
+        lines.append("")
+
+    # -- Flaky tests --
+    if classified["flaky"]:
+        lines.append("## Flaky Tests (passed on retry)")
+        lines.append("")
+        lines.append("| Suite | Test | Retries needed |")
+        lines.append("|-------|------|----------------|")
+        for entry in classified["flaky"]:
+            retry_count = entry.get("retry_count", "?")
+            lines.append(f"| {entry['suite']} | `{entry['name']}` | {retry_count} |")
+        lines.append("")
+
+    # -- Detailed errors --
+    all_failures = classified["failed"] + classified["error"]
+    if all_failures:
+        lines.append("## All Failure Details")
+        lines.append("")
+        for entry in all_failures:
+            lines.append(f"### `{entry['classname']}::{entry['name']}`")
+            lines.append(f"- **Suite**: {entry['suite']}")
+            lines.append(f"- **Source**: {entry['source_file']}")
+            msg = entry.get("message", "").strip()
+            if msg:
+                lines.append("- **Error**:")
+                lines.append("```")
+                for line in msg.split("\n")[:20]:
+                    lines.append(line)
+                lines.append("```")
+            lines.append("")
+
+    if not all_failures and not classified["flaky"] and not resolved_tests:
+        lines.append("All tests passed! No failures or flaky tests detected.")
+        lines.append("")
+
+    return "\n".join(lines)
+
+
+def generate_json_summary(
+    classified, new_failures, recurring_failures, resolved_tests,
+    test_type="", matrix_label="", sha="", date_str="",
+):
+    """Generate a JSON summary for downstream tools (Slack notifier, dashboard)."""
+    return {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "test_type": test_type,
+        "matrix_label": matrix_label,
+        "sha": sha,
+        "date": date_str,
+        "counts": {
+            "total": sum(len(v) for v in classified.values()),
+            "passed": len(classified["passed"]),
+            "failed": len(classified["failed"]) + len(classified["error"]),
+            "flaky": len(classified["flaky"]),
+            "skipped": len(classified["skipped"]),
+            "resolved": len(resolved_tests),
+        },
+        "has_new_failures": len(new_failures) > 0,
+        "new_failures": [
+            {
+                "suite": e["suite"],
+                "name": e["name"],
+                "classname": e["classname"],
+                "message": e.get("message", "")[:200],
+            }
+            for e in new_failures
+        ],
+        "recurring_failures": [
+            {
+                "suite": e["suite"],
+                "name": e["name"],
+                "classname": e["classname"],
+                "first_seen": e.get("first_seen", "unknown"),
+                "message": e.get("message", "")[:200],
+            }
+            for e in recurring_failures
+        ],
+        "flaky_tests": [
+            {
+                "suite": e["suite"],
+                "name": e["name"],
+                "classname": e["classname"],
+                "retry_count": e.get("retry_count", 0),
+            }
+            for e in classified["flaky"]
+        ],
+        "resolved_tests": [
+            {
+                "suite": e["suite"],
+                "name": e["name"],
+                "classname": e["classname"],
+                "first_seen": e.get("first_seen", "unknown"),
+                "failure_count": e.get("failure_count", 0),
+                "was_flaky": e.get("was_flaky", False),
+            }
+            for e in resolved_tests
+        ],
+    }
+
+
+# ---------------------------------------------------------------------------
+# HTML report
+# ---------------------------------------------------------------------------
+
+def _html_escape(text):
+    """Escape HTML special characters."""
+    return (
+        text.replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace('"', "&quot;")
+    )
+
+
+def generate_html_report(
+    classified, new_failures, recurring_failures, resolved_tests, history,
+    test_type="", matrix_label="", sha="", date_str="",
+):
+    """Generate a self-contained HTML report with detailed failure info."""
+    total_passed = len(classified["passed"])
+    total_failed = len(classified["failed"]) + len(classified["error"])
+    total_flaky = len(classified["flaky"])
+    total_skipped = len(classified["skipped"])
+    total = total_passed + total_failed + total_flaky + total_skipped
+
+    title = "Nightly Test Report"
+    if test_type:
+        title += f" &mdash; {_html_escape(test_type)}"
+    if matrix_label:
+        title += f" [{_html_escape(matrix_label)}]"
+
+    # Determine overall status color
+    if total_failed > 0:
+        status_color = "#d32f2f"
+        status_text = f"{total_failed} failure(s)"
+    elif total_flaky > 0:
+        status_color = "#f9a825"
+        status_text = "All passed (flaky detected)"
+    else:
+        status_color = "#388e3c"
+        status_text = "All passed"
+
+    parts = []
+    parts.append(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>{title}</title>
+<style>
+  :root {{ --fail: #d32f2f; --pass: #388e3c; --flaky: #f9a825; --skip: #757575;
+           --bg: #fafafa; --card: #fff; --border: #e0e0e0; --text: #212121; }}
+  * {{ margin: 0; padding: 0; box-sizing: border-box; }}
+  body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
+          Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text);
+          padding: 24px; max-width: 1200px; margin: 0 auto; }}
+  h1 {{ font-size: 1.5rem; margin-bottom: 4px; }}
+  .meta {{ color: #616161; font-size: 0.85rem; margin-bottom: 16px; }}
+  .meta code {{ background: #eeeeee; padding: 2px 6px; border-radius: 3px; font-size: 0.8rem; }}
+  .status-bar {{ padding: 12px 16px; border-radius: 8px; color: #fff;
+                 font-weight: 600; margin-bottom: 20px; font-size: 1.1rem; }}
+  .summary-grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
+                   gap: 12px; margin-bottom: 24px; }}
+  .summary-card {{ background: var(--card); border: 1px solid var(--border);
+                   border-radius: 8px; padding: 16px; text-align: center; }}
+  .summary-card .num {{ font-size: 2rem; font-weight: 700; }}
+  .summary-card .lbl {{ font-size: 0.8rem; color: #757575; text-transform: uppercase; }}
+  .num.pass {{ color: var(--pass); }}  .num.fail {{ color: var(--fail); }}
+  .num.flaky {{ color: var(--flaky); }}  .num.skip {{ color: var(--skip); }}
+  section {{ margin-bottom: 24px; }}
+  h2 {{ font-size: 1.15rem; margin-bottom: 10px; padding-bottom: 4px;
+        border-bottom: 2px solid var(--border); }}
+  table {{ width: 100%; border-collapse: collapse; font-size: 0.85rem; }}
+  th {{ background: #f5f5f5; text-align: left; padding: 8px 10px; font-weight: 600; }}
+  td {{ padding: 8px 10px; border-bottom: 1px solid var(--border); vertical-align: top; }}
+  tr:hover td {{ background: #f5f5f5; }}
+  .badge {{ display: inline-block; padding: 2px 8px; border-radius: 4px;
+            font-size: 0.75rem; font-weight: 600; color: #fff; }}
+  .badge-new {{ background: var(--fail); }}
+  .badge-recurring {{ background: #e65100; }}
+  .badge-flaky {{ background: var(--flaky); color: #212121; }}
+  .badge-resolved {{ background: var(--pass); }}
+  details {{ margin-top: 4px; }}
+  details summary {{ cursor: pointer; color: #1565c0; font-size: 0.8rem; }}
+  pre.error {{ background: #263238; color: #e0e0e0; padding: 12px; border-radius: 6px;
+               font-size: 0.78rem; overflow-x: auto; white-space: pre-wrap;
+               word-break: break-word; max-height: 300px; margin-top: 6px; }}
+  .empty {{ color: #9e9e9e; font-style: italic; padding: 16px; }}
+</style>
+</head>
+<body>
+<h1>{title}</h1>
+<div class="meta">""")
+
+    meta_parts = []
+    if date_str:
+        meta_parts.append(f"Date: <strong>{_html_escape(date_str)}</strong>")
+    if sha:
+        meta_parts.append(f"Commit: <code>{_html_escape(sha[:12])}</code>")
+    if matrix_label:
+        meta_parts.append(f"Matrix: <strong>{_html_escape(matrix_label)}</strong>")
+    parts.append(" &nbsp;|&nbsp; ".join(meta_parts))
+
+    parts.append(f"""</div>
+<div class="status-bar" style="background:{status_color}">{status_text}</div>
+<div class="summary-grid">
+  <div class="summary-card"><div class="num">{total}</div><div class="lbl">Total</div></div>
+  <div class="summary-card"><div class="num pass">{total_passed}</div><div class="lbl">Passed</div></div>
+  <div class="summary-card"><div class="num fail">{total_failed}</div><div class="lbl">Failed</div></div>
+  <div class="summary-card"><div class="num flaky">{total_flaky}</div><div class="lbl">Flaky</div></div>
+  <div class="summary-card"><div class="num skip">{total_skipped}</div><div class="lbl">Skipped</div></div>
+  <div class="summary-card"><div class="num pass">{len(resolved_tests)}</div><div class="lbl">Stabilized</div></div>
+</div>""")
+
+    # --- New failures ---
+    if new_failures:
+        parts.append('<section><h2>New Failures</h2><table>')
+        parts.append('<tr><th>Suite</th><th>Test</th><th>Error</th></tr>')
+        for e in new_failures:
+            msg = _html_escape(e.get("message", ""))
+            short = _html_escape(e.get("message", "")[:100])
+            parts.append(
+                f'<tr><td>{_html_escape(e["suite"])}</td>'
+                f'<td><code>{_html_escape(e["name"])}</code> '
+                f'<span class="badge badge-new">NEW</span></td>'
+                f'<td><details><summary>{short}</summary>'
+                f'<pre class="error">{msg}</pre></details></td></tr>'
+            )
+        parts.append("</table></section>")
+
+    # --- Recurring failures ---
+    if recurring_failures:
+        parts.append('<section><h2>Recurring Failures</h2><table>')
+        parts.append(
+            "<tr><th>Suite</th><th>Test</th><th>First Seen</th>"
+            "<th>Count</th><th>Error</th></tr>"
+        )
+        for e in recurring_failures:
+            msg = _html_escape(e.get("message", ""))
+            short = _html_escape(e.get("message", "")[:100])
+            first_seen = _html_escape(e.get("first_seen", "unknown"))
+            test_key = f"{e['suite']}::{e['classname']}::{e['name']}"
+            count = history.get("tests", {}).get(test_key, {}).get(
+                "failure_count", "?"
+            )
+            parts.append(
+                f'<tr><td>{_html_escape(e["suite"])}</td>'
+                f'<td><code>{_html_escape(e["name"])}</code> '
+                f'<span class="badge badge-recurring">RECURRING</span></td>'
+                f"<td>{first_seen}</td><td>{count}</td>"
+                f'<td><details><summary>{short}</summary>'
+                f'<pre class="error">{msg}</pre></details></td></tr>'
+            )
+        parts.append("</table></section>")
+
+    # --- Stabilized ---
+    if resolved_tests:
+        parts.append('<section><h2>Stabilized Tests</h2><table>')
+        parts.append(
+            "<tr><th>Suite</th><th>Test</th><th>Failing Since</th>"
+            "<th>Failure Count</th><th>Was Flaky?</th></tr>"
+        )
+        for e in resolved_tests:
+            flaky_tag = "Yes" if e.get("was_flaky") else "No"
+            parts.append(
+                f'<tr><td>{_html_escape(e["suite"])}</td>'
+                f'<td><code>{_html_escape(e["name"])}</code> '
+                f'<span class="badge badge-resolved">FIXED</span></td>'
+                f'<td>{_html_escape(e.get("first_seen", "?"))}</td>'
+                f'<td>{e.get("failure_count", "?")}</td>'
+                f"<td>{flaky_tag}</td></tr>"
+            )
+        parts.append("</table></section>")
+
+    # --- Flaky ---
+    if classified["flaky"]:
+        parts.append('<section><h2>Flaky Tests (passed on retry)</h2><table>')
+        parts.append("<tr><th>Suite</th><th>Test</th><th>Retries</th></tr>")
+        for e in classified["flaky"]:
+            parts.append(
+                f'<tr><td>{_html_escape(e["suite"])}</td>'
+                f'<td><code>{_html_escape(e["name"])}</code> '
+                f'<span class="badge badge-flaky">FLAKY</span></td>'
+                f'<td>{e.get("retry_count", "?")}</td></tr>'
+            )
+        parts.append("</table></section>")
+
+    # --- All failure details ---
+    all_failures = classified["failed"] + classified["error"]
+    if all_failures:
+        parts.append("<section><h2>All Failure Details</h2>")
+        for e in all_failures:
+            msg = _html_escape(e.get("message", "").strip())
+            parts.append(
+                f'<h3 style="font-size:0.95rem;margin-top:16px">'
+                f'<code>{_html_escape(e["classname"])}::{_html_escape(e["name"])}</code></h3>'
+                f'<p style="font-size:0.82rem;color:#616161">'
+                f'Suite: {_html_escape(e["suite"])} &nbsp;|&nbsp; '
+                f'Source: {_html_escape(e["source_file"])}</p>'
+            )
+            if msg:
+                parts.append(f'<pre class="error">{msg}</pre>')
+        parts.append("</section>")
+
+    if not all_failures and not classified["flaky"] and not resolved_tests:
+        parts.append('<p class="empty">All tests passed! No failures or flaky tests detected.</p>')
+
+    parts.append("</body></html>")
+    return "\n".join(parts)
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate nightly test failure report from JUnit XML results"
+    )
+    parser.add_argument(
+        "--results-dir", required=True,
+        help="Directory containing JUnit XML test result files",
+    )
+    parser.add_argument(
+        "--output-dir", default="report-output",
+        help="Directory to write report files to",
+    )
+    parser.add_argument(
+        "--sha", default=os.environ.get("GITHUB_SHA", "unknown"),
+        help="Git commit SHA for this run",
+    )
+    parser.add_argument(
+        "--date", default=datetime.now(timezone.utc).strftime("%Y-%m-%d"),
+        help="Date for this run (YYYY-MM-DD)",
+    )
+    parser.add_argument(
+        "--test-type", default="unknown",
+        help=(
+            "Test type identifier (e.g., cpp, python, wheel-python, "
+            "wheel-server, notebooks)"
+        ),
+    )
+    parser.add_argument(
+        "--matrix-label", default="",
+        help=(
+            "Matrix combination label (e.g., cuda12.9-py3.12-x86_64). "
+            "Included in reports and JSON summary to identify the CI job."
+        ),
+    )
+    parser.add_argument(
+        "--s3-history-uri", default="",
+        help=(
+            "S3 URI for persistent failure history JSON. "
+            "Downloaded before analysis, uploaded after update. "
+            "Example: s3://bucket/ci_test_reports/nightly/history/"
+            "python-main-cuda12.9-py3.12-x86_64.json"
+        ),
+    )
+    parser.add_argument(
+        "--s3-summary-uri", default="",
+        help=(
+            "S3 URI to upload this run's JSON snapshot for aggregation. "
+            "Example: s3://bucket/ci_test_reports/nightly/summaries/"
+            "2026-04-13/python-cuda12.9-py3.12-x86_64.json"
+        ),
+    )
+    parser.add_argument(
+        "--s3-html-uri", default="",
+        help=(
+            "S3 URI to upload the HTML report. "
+            "Example: s3://bucket/ci_test_reports/nightly/reports/"
+            "2026-04-13/python-cuda12.9-py3.12-x86_64.html"
+        ),
+    )
+    parser.add_argument(
+        "--github-step-summary",
+        default=os.environ.get("GITHUB_STEP_SUMMARY", ""),
+        help="Path to write GitHub Actions step summary",
+    )
+
+    args = parser.parse_args()
+
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    local_history_path = str(output_dir / "test_failure_history.json")
+
+    # ---- Step 1: Download history from S3 ----
+    if args.s3_history_uri:
+        s3_download(args.s3_history_uri, local_history_path)
+
+    # ---- Step 2: Collect and classify results ----
+    print(f"Collecting test results from {args.results_dir} ...")
+    results = collect_all_results(args.results_dir)
+    if not results:
+        print("WARNING: No test results found.", file=sys.stderr)
+
+    print(f"Found {len(results)} test case entries across all XML files")
+    classified = classify_failures(results)
+
+    print(
+        f"Classification: {len(classified['passed'])} passed, "
+        f"{len(classified['failed'])} failed, "
+        f"{len(classified['error'])} errors, "
+        f"{len(classified['flaky'])} flaky, "
+        f"{len(classified['skipped'])} skipped"
+    )
+
+    # ---- Step 3: Update history ----
+    history = load_history(local_history_path)
+    history, new_failures, recurring_failures, resolved_tests = update_history(
+        history, classified, args.sha, args.date
+    )
+
+    if resolved_tests:
+        print(f"Stabilized: {len(resolved_tests)} previously-failing test(s) now pass")
+
+    save_history(history, local_history_path)
+    print(f"Updated local history at {local_history_path}")
+
+    # ---- Step 4: Upload history back to S3 ----
+    if args.s3_history_uri:
+        s3_upload(local_history_path, args.s3_history_uri)
+
+    # ---- Step 5: Generate reports ----
+    report_kwargs = dict(
+        test_type=args.test_type,
+        matrix_label=args.matrix_label,
+        sha=args.sha,
+        date_str=args.date,
+    )
+
+    md_report = generate_markdown_report(
+        classified, new_failures, recurring_failures, resolved_tests, history,
+        **report_kwargs,
+    )
+    md_path = output_dir / "nightly_report.md"
+    md_path.write_text(md_report)
+    print(f"Markdown report written to {md_path}")
+
+    html_report = generate_html_report(
+        classified, new_failures, recurring_failures, resolved_tests, history,
+        **report_kwargs,
+    )
+    html_path = output_dir / "nightly_report.html"
+    html_path.write_text(html_report)
+    print(f"HTML report written to {html_path}")
+
+    json_summary = generate_json_summary(
+        classified, new_failures, recurring_failures, resolved_tests,
+        **report_kwargs,
+    )
+    json_path = output_dir / "nightly_summary.json"
+    json_path.write_text(json.dumps(json_summary, indent=2) + "\n")
+    print(f"JSON summary written to {json_path}")
+
+    if args.github_step_summary:
+        with open(args.github_step_summary, "a") as f:
+            f.write(md_report)
+        print(f"Wrote GitHub Step Summary to {args.github_step_summary}")
+
+    # ---- Step 6: Upload per-run snapshot and HTML to S3 ----
+    if args.s3_summary_uri:
+        s3_upload(str(json_path), args.s3_summary_uri)
+
+    if args.s3_html_uri:
+        s3_upload(str(html_path), args.s3_html_uri)
+
+    # ---- Exit code ----
+    genuine_failures = len(classified["failed"]) + len(classified["error"])
+    if genuine_failures > 0:
+        print(f"\nFAILED: {genuine_failures} genuine test failure(s) detected.")
+        return 1
+    if classified["flaky"]:
+        print(f"\nWARNING: All tests passed but {len(classified['flaky'])} flaky test(s) detected.")
+    else:
+        print("\nAll tests passed.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/ci/utils/nightly_report_helper.sh b/ci/utils/nightly_report_helper.sh
new file mode 100755
index 0000000000..809b918df8
--- /dev/null
+++ b/ci/utils/nightly_report_helper.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Shared helper for generating nightly test reports with matrix-aware S3 paths.
+#
+# Usage (source from any test script):
+#
+#   # For C++ tests (no Python version in matrix label):
+#   generate_nightly_report "cpp"
+#
+#   # For Python tests (includes Python version in matrix label):
+#   generate_nightly_report "python" --with-python-version
+#
+#   # For wheel tests:
+#   generate_nightly_report "wheel-python" --with-python-version
+#
+# Prerequisites (set before calling):
+#   RAPIDS_TESTS_DIR   - directory containing JUnit XML test results
+#
+# Optional environment variables (auto-detected if not set):
+#   RAPIDS_CUDA_VERSION   - CUDA version (e.g., "12.9")
+#   RAPIDS_PY_VERSION     - Python version (e.g., "3.12"), used with --with-python-version
+#   RAPIDS_BRANCH         - branch name (e.g., "main")
+#   CUOPT_DATASET_S3_URI  - S3 base URI for reports
+#   GITHUB_SHA            - commit SHA
+#   GITHUB_STEP_SUMMARY   - path for GitHub Actions step summary
+
+# Resolve the directory where THIS helper lives (ci/utils/)
+_HELPER_DIR="$(dirname "$(realpath "${BASH_SOURCE[0]}")")"
+
+generate_nightly_report() {
+    local test_type="${1:?Usage: generate_nightly_report <test_type> [--with-python-version]}"
+    local include_py_version=false
+
+    shift
+    while [ $# -gt 0 ]; do
+        case "$1" in
+            --with-python-version) include_py_version=true ;;
+            *) echo "WARNING: Unknown option: $1" >&2 ;;
+        esac
+        shift
+    done
+
+    # --- Build matrix label ---
+    local cuda_tag="cuda${RAPIDS_CUDA_VERSION:-unknown}"
+    local arch_tag
+    arch_tag="$(arch)"
+    local matrix_label="${cuda_tag}-${arch_tag}"
+
+    if [ "${include_py_version}" = true ]; then
+        local py_tag="py${RAPIDS_PY_VERSION:-unknown}"
+        matrix_label="${cuda_tag}-${py_tag}-${arch_tag}"
+    fi
+
+    local branch_slug
+    branch_slug=$(echo "${RAPIDS_BRANCH:-main}" | tr '/' '-')
+    local run_date
+    run_date="$(date +%F)"
+
+    # --- Ensure results dir exists ---
+    RAPIDS_TESTS_DIR="${RAPIDS_TESTS_DIR:-${PWD}/test-results}"
+    mkdir -p "${RAPIDS_TESTS_DIR}"
+
+    local report_output_dir="${RAPIDS_TESTS_DIR}/report"
+    mkdir -p "${report_output_dir}"
+
+    # --- Build S3 URIs ---
+    local s3_history_uri=""
+    local s3_summary_uri=""
+    local s3_html_uri=""
+
+    if [ -n "${CUOPT_DATASET_S3_URI:-}" ]; then
+        local s3_base="${CUOPT_DATASET_S3_URI}ci_test_reports/nightly"
+        s3_history_uri="${s3_base}/history/${test_type}-${branch_slug}-${matrix_label}.json"
+        s3_summary_uri="${s3_base}/summaries/${run_date}/${test_type}-${matrix_label}.json"
+        s3_html_uri="${s3_base}/reports/${run_date}/${test_type}-${matrix_label}.html"
+    fi
+
+    # --- Run nightly report ---
+    python3 "${_HELPER_DIR}/nightly_report.py" \
+        --results-dir "${RAPIDS_TESTS_DIR}" \
+        --output-dir "${report_output_dir}" \
+        --sha "${GITHUB_SHA:-unknown}" \
+        --date "${run_date}" \
+        --test-type "${test_type}" \
+        --matrix-label "${matrix_label}" \
+        --s3-history-uri "${s3_history_uri}" \
+        --s3-summary-uri "${s3_summary_uri}" \
+        --s3-html-uri "${s3_html_uri}" \
+        --github-step-summary "${GITHUB_STEP_SUMMARY:-}" \
+        || true
+}
diff --git a/ci/utils/s3_helpers.py b/ci/utils/s3_helpers.py
new file mode 100644
index 0000000000..f1f5795661
--- /dev/null
+++ b/ci/utils/s3_helpers.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Shared S3 helper functions for cuOpt CI scripts.
+
+Maps CUOPT_AWS_* credentials to standard AWS env vars and provides
+download / upload / list wrappers around the aws CLI.
+"""
+
+import os
+import subprocess
+import sys
+
+
+def s3_env():
+    """Build env dict with CUOPT AWS credentials mapped to standard AWS vars."""
+    env = os.environ.copy()
+    if os.environ.get("CUOPT_AWS_ACCESS_KEY_ID"):
+        env["AWS_ACCESS_KEY_ID"] = os.environ["CUOPT_AWS_ACCESS_KEY_ID"]
+    if os.environ.get("CUOPT_AWS_SECRET_ACCESS_KEY"):
+        env["AWS_SECRET_ACCESS_KEY"] = os.environ["CUOPT_AWS_SECRET_ACCESS_KEY"]
+    if os.environ.get("CUOPT_AWS_REGION"):
+        env["AWS_DEFAULT_REGION"] = os.environ["CUOPT_AWS_REGION"]
+    elif "AWS_DEFAULT_REGION" not in env:
+        env["AWS_DEFAULT_REGION"] = "us-east-1"
+    return env
+
+
+def s3_download(s3_uri, local_path):
+    """Download a file from S3. Returns True on success, False on any error."""
+    env = s3_env()
+    try:
+        subprocess.run(
+            ["aws", "s3", "cp", s3_uri, local_path],
+            env=env, check=True, capture_output=True, text=True,
+        )
+        print(f"Downloaded {s3_uri}")
+        return True
+    except FileNotFoundError:
+        print("WARNING: aws CLI not found, skipping S3 download", file=sys.stderr)
+        return False
+    except subprocess.CalledProcessError as exc:
+        print(
+            f"WARNING: S3 download failed (first run?): {exc.stderr.strip()}",
+            file=sys.stderr,
+        )
+        return False
+
+
+def s3_upload(local_path, s3_uri):
+    """Upload a file to S3. Returns True on success."""
+    env = s3_env()
+    try:
+        subprocess.run(
+            ["aws", "s3", "cp", local_path, s3_uri],
+            env=env, check=True, capture_output=True, text=True,
+        )
+        print(f"Uploaded {local_path} to {s3_uri}")
+        return True
+    except FileNotFoundError:
+        print("WARNING: aws CLI not found, skipping S3 upload", file=sys.stderr)
+        return False
+    except subprocess.CalledProcessError as exc:
+        print(f"WARNING: S3 upload failed: {exc.stderr.strip()}", file=sys.stderr)
+        return False
+
+
+def s3_list(s3_prefix):
+    """List objects under an S3 prefix. Returns list of S3 URIs."""
+    env = s3_env()
+    try:
+        result = subprocess.run(
+            ["aws", "s3", "ls", s3_prefix],
+            env=env, check=True, capture_output=True, text=True,
+        )
+    except (FileNotFoundError, subprocess.CalledProcessError) as exc:
+        print(f"WARNING: S3 ls failed: {exc}", file=sys.stderr)
+        return []
+
+    uris = []
+    for line in result.stdout.strip().splitlines():
+        parts = line.split()
+        if parts:
+            uris.append(f"{s3_prefix}{parts[-1]}")
+    return uris

From 13926915994c22dfaa8736cefc49b7052506c509 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 11:46:09 -0500
Subject: [PATCH 03/60] Instrument all test scripts with nightly reporting

Add nightly report generation to cpp, python, wheel-python,
wheel-server, and notebook test scripts using the shared helper.
Wheel and notebook scripts also gain JUnit XML output and EXITCODE
trap pattern for consistent error handling.
---
 ci/test_cpp.sh                |  4 ++++
 ci/test_notebooks.sh          |  6 ++++++
 ci/test_python.sh             |  4 ++++
 ci/test_wheel_cuopt.sh        | 17 ++++++++++++++++-
 ci/test_wheel_cuopt_server.sh | 17 ++++++++++++++++-
 5 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index 653c44133a..4def832194 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -54,5 +54,9 @@ export GTEST_OUTPUT=xml:${RAPIDS_TESTS_DIR}/
 rapids-logger "Run gtests"
 timeout 40m ./ci/run_ctests.sh
 
+rapids-logger "Generate nightly test report"
+source "$(dirname "$(realpath "${BASH_SOURCE[0]}")")/utils/nightly_report_helper.sh"
+generate_nightly_report "cpp"
+
 rapids-logger "Test script exiting with value: $EXITCODE"
 exit ${EXITCODE}
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index 22c41af84c..b58c9a1d32 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -64,5 +64,11 @@ for nb in ${NBLIST}; do
   fi
 done
 
+popd
+
+rapids-logger "Generate nightly test report"
+source "$(dirname "$(realpath "${BASH_SOURCE[0]}")")/utils/nightly_report_helper.sh"
+generate_nightly_report "notebooks" --with-python-version
+
 rapids-logger "Notebook test script exiting with value: $EXITCODE"
 exit ${EXITCODE}
diff --git a/ci/test_python.sh b/ci/test_python.sh
index 4f91c83334..9af612ad76 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -77,5 +77,9 @@ timeout 20m ./ci/run_cuopt_server_pytests.sh \
 rapids-logger "Test skills/ assets (Python, C, CLI)"
 timeout 10m ./ci/test_skills_assets.sh
 
+rapids-logger "Generate nightly test report"
+source "$(dirname "$(realpath "${BASH_SOURCE[0]}")")/utils/nightly_report_helper.sh"
+generate_nightly_report "python" --with-python-version
+
 rapids-logger "Test script exiting with value: $EXITCODE"
 exit ${EXITCODE}
diff --git a/ci/test_wheel_cuopt.sh b/ci/test_wheel_cuopt.sh
index a327082e83..5d002731b0 100755
--- a/ci/test_wheel_cuopt.sh
+++ b/ci/test_wheel_cuopt.sh
@@ -63,6 +63,13 @@ cd -
 RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
 export RAPIDS_DATASET_ROOT_DIR
 
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
+mkdir -p "${RAPIDS_TESTS_DIR}"
+
+EXITCODE=0
+trap "EXITCODE=1" ERR
+set +e
+
 # Run CLI tests
 timeout 10m bash ./python/libcuopt/libcuopt/tests/test_cli.sh
 
@@ -71,7 +78,9 @@ timeout 10m bash ./python/libcuopt/libcuopt/tests/test_cli.sh
 # Due to race condition in certain cases UCX might not be able to cleanup properly, so we set the number of threads to 1
 export OMP_NUM_THREADS=1
 
-timeout 30m ./ci/run_cuopt_pytests.sh --verbose --capture=no
+timeout 30m ./ci/run_cuopt_pytests.sh \
+  --junitxml="${RAPIDS_TESTS_DIR}/junit-wheel-cuopt.xml" \
+  --verbose --capture=no
 
 # run thirdparty integration tests for only nightly builds
 if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
@@ -80,3 +89,9 @@ if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
     ./ci/thirdparty-testing/run_pulp_tests.sh
     ./ci/thirdparty-testing/run_pyomo_tests.sh
 fi
+
+# Generate nightly test report
+source "$(dirname "$(realpath "${BASH_SOURCE[0]}")")/utils/nightly_report_helper.sh"
+generate_nightly_report "wheel-python" --with-python-version
+
+exit ${EXITCODE}
diff --git a/ci/test_wheel_cuopt_server.sh b/ci/test_wheel_cuopt_server.sh
index a76969b965..55852a913c 100755
--- a/ci/test_wheel_cuopt_server.sh
+++ b/ci/test_wheel_cuopt_server.sh
@@ -39,7 +39,22 @@ rapids-pip-retry install \
 RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
 export RAPIDS_DATASET_ROOT_DIR
 
-timeout 30m ./ci/run_cuopt_server_pytests.sh --verbose --capture=no
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
+mkdir -p "${RAPIDS_TESTS_DIR}"
+
+EXITCODE=0
+trap "EXITCODE=1" ERR
+set +e
+
+timeout 30m ./ci/run_cuopt_server_pytests.sh \
+  --junitxml="${RAPIDS_TESTS_DIR}/junit-wheel-cuopt-server.xml" \
+  --verbose --capture=no
 
 # Run documentation tests
 ./ci/test_doc_examples.sh
+
+# Generate nightly test report
+source "$(dirname "$(realpath "${BASH_SOURCE[0]}")")/utils/nightly_report_helper.sh"
+generate_nightly_report "wheel-server" --with-python-version
+
+exit ${EXITCODE}

From f6b0f75555c5e149ddb1f3edc84a3a37612da1e9 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 11:46:23 -0500
Subject: [PATCH 04/60] Add aggregation, Slack notifications, and test
 dashboard

Add aggregate_nightly.py to merge per-matrix JSON summaries into a
consolidated report with matrix grid. Add Slack notifiers for both
per-job and consolidated messages with HTML file upload support.
Add nightly_summary.sh wrapper for the post-test aggregation job.
Add static HTML dashboard with matrix overview, failure drill-down,
and trend charts reading from S3 index.json.
---
 ci/dashboard/index.html               | 652 ++++++++++++++++++++++++++
 ci/nightly_summary.sh                 |  71 +++
 ci/utils/aggregate_nightly.py         | 605 ++++++++++++++++++++++++
 ci/utils/send_consolidated_summary.sh | 287 ++++++++++++
 ci/utils/send_nightly_summary.sh      | 172 +++++++
 5 files changed, 1787 insertions(+)
 create mode 100644 ci/dashboard/index.html
 create mode 100755 ci/nightly_summary.sh
 create mode 100644 ci/utils/aggregate_nightly.py
 create mode 100755 ci/utils/send_consolidated_summary.sh
 create mode 100755 ci/utils/send_nightly_summary.sh

diff --git a/ci/dashboard/index.html b/ci/dashboard/index.html
new file mode 100644
index 0000000000..9b56a7c915
--- /dev/null
+++ b/ci/dashboard/index.html
@@ -0,0 +1,652 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>cuOpt Nightly Test Dashboard</title>
+<style>
+/* ------------------------------------------------------------------ */
+/* Design tokens                                                      */
+/* ------------------------------------------------------------------ */
+:root {
+  --fail:    #d32f2f; --fail-bg:    #ffebee;
+  --pass:    #388e3c; --pass-bg:    #e8f5e9;
+  --flaky:   #f9a825; --flaky-bg:   #fff8e1;
+  --skip:    #757575; --skip-bg:    #f5f5f5;
+  --new:     #d32f2f;
+  --recur:   #e65100;
+  --bg:      #fafafa; --card: #fff; --border: #e0e0e0; --text: #212121;
+  --sidebar: #263238; --sidebar-text: #eceff1;
+}
+
+/* ------------------------------------------------------------------ */
+/* Reset & base                                                        */
+/* ------------------------------------------------------------------ */
+* { margin: 0; padding: 0; box-sizing: border-box; }
+body {
+  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
+               Helvetica, Arial, sans-serif;
+  background: var(--bg); color: var(--text);
+  display: flex; min-height: 100vh;
+}
+
+/* ------------------------------------------------------------------ */
+/* Sidebar                                                             */
+/* ------------------------------------------------------------------ */
+.sidebar {
+  width: 240px; background: var(--sidebar); color: var(--sidebar-text);
+  padding: 20px 16px; flex-shrink: 0; display: flex; flex-direction: column;
+}
+.sidebar h1 { font-size: 1.1rem; margin-bottom: 20px; }
+.sidebar label { font-size: 0.8rem; text-transform: uppercase; color: #90a4ae;
+                 margin-bottom: 4px; display: block; }
+.sidebar select, .sidebar input {
+  width: 100%; padding: 6px 8px; margin-bottom: 16px; border-radius: 4px;
+  border: 1px solid #455a64; background: #37474f; color: #eceff1;
+  font-size: 0.85rem;
+}
+.sidebar .filter-group { margin-bottom: 12px; }
+.sidebar .filter-chip {
+  display: inline-block; padding: 3px 8px; border-radius: 12px;
+  font-size: 0.75rem; margin: 2px 2px; cursor: pointer;
+  border: 1px solid #546e7a; color: #b0bec5;
+}
+.sidebar .filter-chip.active { background: #455a64; color: #fff; border-color: #90a4ae; }
+.sidebar .status-legend { margin-top: auto; font-size: 0.75rem; }
+.sidebar .status-legend div { margin: 3px 0; }
+.sidebar .dot {
+  display: inline-block; width: 10px; height: 10px; border-radius: 50%;
+  margin-right: 4px; vertical-align: middle;
+}
+
+/* ------------------------------------------------------------------ */
+/* Main content                                                        */
+/* ------------------------------------------------------------------ */
+.main { flex: 1; padding: 24px; overflow-y: auto; }
+.main h2 { font-size: 1.15rem; margin-bottom: 10px; }
+
+/* Loading / empty states */
+.loading, .empty-state {
+  text-align: center; padding: 60px 20px; color: #9e9e9e;
+  font-size: 0.95rem;
+}
+.loading::after { content: ""; display: block; margin: 16px auto 0;
+  width: 28px; height: 28px; border: 3px solid #e0e0e0;
+  border-top-color: #1565c0; border-radius: 50%;
+  animation: spin 0.8s linear infinite; }
+@keyframes spin { to { transform: rotate(360deg); } }
+
+/* Status bar */
+.status-bar { padding: 12px 16px; border-radius: 8px; color: #fff;
+              font-weight: 600; margin-bottom: 20px; font-size: 1rem; }
+
+/* Summary cards */
+.summary-grid {
+  display: grid; grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
+  gap: 10px; margin-bottom: 24px;
+}
+.summary-card { background: var(--card); border: 1px solid var(--border);
+                border-radius: 8px; padding: 14px; text-align: center; }
+.summary-card .num { font-size: 1.6rem; font-weight: 700; }
+.summary-card .lbl { font-size: 0.72rem; color: #757575; text-transform: uppercase; }
+
+/* Tables */
+section { margin-bottom: 24px; }
+table { width: 100%; border-collapse: collapse; font-size: 0.82rem; }
+th { background: #f5f5f5; text-align: left; padding: 7px 10px; font-weight: 600;
+     position: sticky; top: 0; z-index: 1; cursor: pointer; user-select: none; }
+th:hover { background: #eeeeee; }
+td { padding: 7px 10px; border-bottom: 1px solid var(--border); vertical-align: top; }
+tr:hover td { background: #f5f5f5; }
+
+/* Badges */
+.badge { display: inline-block; padding: 2px 7px; border-radius: 4px;
+         font-size: 0.7rem; font-weight: 600; color: #fff; }
+.badge-pass { background: var(--pass); }
+.badge-fail-new { background: var(--new); }
+.badge-fail-recurring { background: var(--recur); }
+.badge-flaky { background: var(--flaky); color: #212121; }
+.badge-no-results { background: var(--skip); }
+
+/* Error detail toggle */
+details summary { cursor: pointer; color: #1565c0; font-size: 0.78rem; }
+pre.error { background: #263238; color: #e0e0e0; padding: 10px; border-radius: 6px;
+            font-size: 0.75rem; overflow-x: auto; white-space: pre-wrap;
+            word-break: break-word; max-height: 250px; margin-top: 4px; }
+
+/* Trend chart area */
+.chart-container { background: var(--card); border: 1px solid var(--border);
+                   border-radius: 8px; padding: 16px; margin-bottom: 24px; }
+.chart-bar-row { display: flex; align-items: center; margin: 3px 0; font-size: 0.78rem; }
+.chart-bar-row .date-label { width: 80px; flex-shrink: 0; color: #757575; }
+.chart-bar-row .bar-track { flex: 1; height: 18px; background: #f5f5f5;
+                            border-radius: 3px; display: flex; overflow: hidden; }
+.chart-bar-row .bar-segment { height: 100%; transition: width 0.3s; }
+.chart-bar-row .bar-count { width: 50px; text-align: right; flex-shrink: 0;
+                            font-size: 0.72rem; color: #757575; margin-left: 6px; }
+
+/* Tabs */
+.tab-bar { display: flex; gap: 4px; margin-bottom: 16px; flex-wrap: wrap; }
+.tab-btn { padding: 6px 14px; border: 1px solid var(--border); border-radius: 6px;
+           background: var(--card); cursor: pointer; font-size: 0.82rem;
+           transition: all 0.15s; }
+.tab-btn:hover { background: #f5f5f5; }
+.tab-btn.active { background: #1565c0; color: #fff; border-color: #1565c0; }
+
+/* Responsive */
+@media (max-width: 768px) {
+  body { flex-direction: column; }
+  .sidebar { width: 100%; flex-direction: row; flex-wrap: wrap; padding: 12px; }
+  .sidebar h1 { width: 100%; }
+  .sidebar .status-legend { display: none; }
+}
+</style>
+</head>
+<body>
+
+<!-- ================================================================ -->
+<!-- Sidebar                                                          -->
+<!-- ================================================================ -->
+<aside class="sidebar">
+  <h1>cuOpt Nightly</h1>
+
+  <label for="date-select">Date</label>
+  <select id="date-select"><option>Loading...</option></select>
+
+  <label for="branch-select">Branch</label>
+  <select id="branch-select"><option value="main">main</option></select>
+
+  <div class="filter-group">
+    <label>Test Type</label>
+    <div id="test-type-filters"></div>
+  </div>
+
+  <div class="filter-group">
+    <label>Status</label>
+    <div id="status-filters">
+      <span class="filter-chip active" data-status="all">All</span>
+      <span class="filter-chip" data-status="failed-new">New Fail</span>
+      <span class="filter-chip" data-status="failed-recurring">Recurring</span>
+      <span class="filter-chip" data-status="flaky">Flaky</span>
+      <span class="filter-chip" data-status="passed">Passed</span>
+    </div>
+  </div>
+
+  <div class="status-legend">
+    <div><span class="dot" style="background:var(--pass)"></span> Passed</div>
+    <div><span class="dot" style="background:var(--new)"></span> New failure</div>
+    <div><span class="dot" style="background:var(--recur)"></span> Recurring</div>
+    <div><span class="dot" style="background:var(--flaky)"></span> Flaky</div>
+    <div><span class="dot" style="background:var(--skip)"></span> No data</div>
+  </div>
+</aside>
+
+<!-- ================================================================ -->
+<!-- Main content                                                     -->
+<!-- ================================================================ -->
+<div class="main" id="main-content">
+  <div class="loading" id="loading-indicator">Loading dashboard data...</div>
+</div>
+
+<!-- ================================================================ -->
+<!-- Templates (hidden)                                               -->
+<!-- ================================================================ -->
+<template id="tmpl-dashboard">
+  <div class="status-bar" id="status-bar"></div>
+
+  <div class="summary-grid" id="summary-cards"></div>
+
+  <div class="tab-bar" id="tab-bar">
+    <button class="tab-btn active" data-tab="matrix">Matrix Grid</button>
+    <button class="tab-btn" data-tab="failures">Failures</button>
+    <button class="tab-btn" data-tab="flaky">Flaky</button>
+    <button class="tab-btn" data-tab="resolved">Stabilized</button>
+    <button class="tab-btn" data-tab="trends">Trends</button>
+  </div>
+
+  <div id="tab-content"></div>
+</template>
+
+<script>
+/* ================================================================== */
+/* State                                                               */
+/* ================================================================== */
+const S = {
+  baseUrl: '',          // Set by config or URL param
+  index: null,          // index.json data
+  current: null,        // Current day's consolidated_summary.json
+  activeTab: 'matrix',
+  filters: { testType: new Set(), status: 'all' },
+};
+
+/* ================================================================== */
+/* Init                                                                */
+/* ================================================================== */
+async function init() {
+  // Determine S3 base URL from query param or auto-detect from location
+  const params = new URLSearchParams(window.location.search);
+  S.baseUrl = params.get('base_url') || deriveBaseUrl();
+
+  if (!S.baseUrl) {
+    showEmpty('Set <code>?base_url=https://...</code> to the S3 base URL for ci_test_reports/nightly/');
+    return;
+  }
+
+  // Ensure trailing slash
+  if (!S.baseUrl.endsWith('/')) S.baseUrl += '/';
+
+  try {
+    // Load index
+    const indexResp = await fetch(S.baseUrl + 'index.json');
+    if (!indexResp.ok) throw new Error(`index.json: ${indexResp.status}`);
+    S.index = await indexResp.json();
+  } catch (e) {
+    showEmpty(`Failed to load index.json from <code>${esc(S.baseUrl)}</code>.<br>${esc(e.message)}`);
+    return;
+  }
+
+  populateDateSelector();
+  setupEventListeners();
+
+  // Load most recent date
+  const dates = Object.keys(S.index.dates || {}).sort().reverse();
+  if (dates.length > 0) {
+    await loadDate(dates[0]);
+  } else {
+    showEmpty('No nightly data available yet.');
+  }
+}
+
+function deriveBaseUrl() {
+  // If dashboard is hosted at .../ci_test_reports/nightly/dashboard/index.html
+  const loc = window.location.href;
+  const match = loc.match(/(.*\/ci_test_reports\/nightly\/)/);
+  return match ? match[1] : '';
+}
+
+/* ================================================================== */
+/* Data loading                                                        */
+/* ================================================================== */
+async function loadDate(dateStr) {
+  const main = document.getElementById('main-content');
+  main.innerHTML = '<div class="loading">Loading data for ' + esc(dateStr) + '...</div>';
+
+  try {
+    const url = S.baseUrl + 'summaries/' + dateStr + '/consolidated.json';
+    const resp = await fetch(url);
+    if (!resp.ok) throw new Error(`${resp.status}`);
+    S.current = await resp.json();
+  } catch (e) {
+    showEmpty(`No data for ${esc(dateStr)}. (${esc(e.message)})`);
+    return;
+  }
+
+  document.getElementById('date-select').value = dateStr;
+  populateTestTypeFilters();
+  render();
+}
+
+/* ================================================================== */
+/* Rendering                                                           */
+/* ================================================================== */
+function render() {
+  const main = document.getElementById('main-content');
+  const tmpl = document.getElementById('tmpl-dashboard');
+  main.innerHTML = '';
+  main.appendChild(tmpl.content.cloneNode(true));
+
+  renderStatusBar();
+  renderSummaryCards();
+  renderTab(S.activeTab);
+  bindTabEvents();
+}
+
+function renderStatusBar() {
+  const el = document.getElementById('status-bar');
+  const d = S.current;
+  const jobs = d.job_summary || {};
+  const failed = jobs.failed || 0;
+  const total = jobs.total || 0;
+
+  if (failed > 0 && d.has_new_failures) {
+    el.style.background = 'var(--fail)';
+    el.textContent = `${failed} of ${total} matrix jobs have NEW failures — ${d.branch} — ${d.date}`;
+  } else if (failed > 0) {
+    el.style.background = 'var(--recur)';
+    el.textContent = `${failed} of ${total} matrix jobs have recurring failures — ${d.branch} — ${d.date}`;
+  } else if ((d.test_totals || {}).flaky > 0) {
+    el.style.background = 'var(--flaky)';
+    el.style.color = '#212121';
+    el.textContent = `All ${total} jobs passed (flaky tests detected) — ${d.branch} — ${d.date}`;
+  } else {
+    el.style.background = 'var(--pass)';
+    el.textContent = `All ${total} matrix jobs passed — ${d.branch} — ${d.date}`;
+  }
+}
+
+function renderSummaryCards() {
+  const el = document.getElementById('summary-cards');
+  const t = S.current.test_totals || {};
+  const cards = [
+    { num: t.total || 0, lbl: 'Total Tests', cls: '' },
+    { num: t.passed || 0, lbl: 'Passed', cls: 'pass' },
+    { num: t.failed || 0, lbl: 'Failed', cls: 'fail' },
+    { num: t.flaky || 0, lbl: 'Flaky', cls: 'flaky' },
+    { num: t.skipped || 0, lbl: 'Skipped', cls: 'skip' },
+    { num: t.resolved || 0, lbl: 'Stabilized', cls: 'pass' },
+  ];
+  el.innerHTML = cards.map(c =>
+    `<div class="summary-card"><div class="num ${c.cls}">${c.num}</div><div class="lbl">${c.lbl}</div></div>`
+  ).join('');
+}
+
+/* ------------------------------------------------------------------ */
+/* Tab: Matrix Grid                                                    */
+/* ------------------------------------------------------------------ */
+function renderMatrixGrid() {
+  const grid = filterGrid(S.current.matrix_grid || []);
+  if (!grid.length) return '<p class="empty-state">No matching matrix jobs.</p>';
+
+  let html = '<section><h2>Matrix Overview</h2><table>';
+  html += '<tr><th>Test Type</th><th>Matrix</th><th>Status</th><th>Passed</th><th>Failed</th><th>Flaky</th><th>Total</th></tr>';
+  for (const g of grid) {
+    const c = g.counts || {};
+    html += `<tr>
+      <td><strong>${esc(g.test_type)}</strong></td>
+      <td><code>${esc(g.matrix_label)}</code></td>
+      <td>${statusBadge(g.status)}</td>
+      <td>${c.passed||0}</td><td>${c.failed||0}</td><td>${c.flaky||0}</td><td>${c.total||0}</td>
+    </tr>`;
+  }
+  html += '</table></section>';
+  return html;
+}
+
+/* ------------------------------------------------------------------ */
+/* Tab: Failures                                                       */
+/* ------------------------------------------------------------------ */
+function renderFailures() {
+  const newF = filterEntries(S.current.new_failures || []);
+  const recF = filterEntries(S.current.recurring_failures || []);
+
+  if (!newF.length && !recF.length)
+    return '<p class="empty-state">No failures matching current filters.</p>';
+
+  let html = '';
+  if (newF.length) {
+    html += '<section><h2>New Failures</h2><table>';
+    html += '<tr><th>Test Type</th><th>Matrix</th><th>Suite</th><th>Test</th><th>Error</th></tr>';
+    for (const e of newF) {
+      const msg = esc(e.message || '');
+      const short = esc((e.message || '').slice(0, 100));
+      html += `<tr>
+        <td>${esc(e.test_type||'')}</td>
+        <td><code>${esc(e.matrix_label||'')}</code></td>
+        <td>${esc(e.suite)}</td>
+        <td><code>${esc(e.name)}</code> <span class="badge badge-fail-new">NEW</span></td>
+        <td><details><summary>${short}</summary><pre class="error">${msg}</pre></details></td>
+      </tr>`;
+    }
+    html += '</table></section>';
+  }
+  if (recF.length) {
+    html += '<section><h2>Recurring Failures</h2><table>';
+    html += '<tr><th>Test Type</th><th>Matrix</th><th>Suite</th><th>Test</th><th>Since</th><th>Error</th></tr>';
+    for (const e of recF) {
+      const msg = esc(e.message || '');
+      const short = esc((e.message || '').slice(0, 100));
+      html += `<tr>
+        <td>${esc(e.test_type||'')}</td>
+        <td><code>${esc(e.matrix_label||'')}</code></td>
+        <td>${esc(e.suite)}</td>
+        <td><code>${esc(e.name)}</code> <span class="badge badge-fail-recurring">RECURRING</span></td>
+        <td>${esc(e.first_seen||'?')}</td>
+        <td><details><summary>${short}</summary><pre class="error">${msg}</pre></details></td>
+      </tr>`;
+    }
+    html += '</table></section>';
+  }
+  return html;
+}
+
+/* ------------------------------------------------------------------ */
+/* Tab: Flaky                                                          */
+/* ------------------------------------------------------------------ */
+function renderFlaky() {
+  const items = filterEntries(S.current.flaky_tests || []);
+  if (!items.length)
+    return '<p class="empty-state">No flaky tests matching current filters.</p>';
+
+  let html = '<section><h2>Flaky Tests (passed on retry)</h2><table>';
+  html += '<tr><th>Test Type</th><th>Matrix</th><th>Suite</th><th>Test</th><th>Retries</th></tr>';
+  for (const e of items) {
+    html += `<tr>
+      <td>${esc(e.test_type||'')}</td>
+      <td><code>${esc(e.matrix_label||'')}</code></td>
+      <td>${esc(e.suite)}</td>
+      <td><code>${esc(e.name)}</code> <span class="badge badge-flaky">FLAKY</span></td>
+      <td>${e.retry_count||'?'}</td>
+    </tr>`;
+  }
+  html += '</table></section>';
+  return html;
+}
+
+/* ------------------------------------------------------------------ */
+/* Tab: Resolved                                                       */
+/* ------------------------------------------------------------------ */
+function renderResolved() {
+  const items = filterEntries(S.current.resolved_tests || []);
+  if (!items.length)
+    return '<p class="empty-state">No stabilized tests matching current filters.</p>';
+
+  let html = '<section><h2>Stabilized Tests (were failing, now passing)</h2><table>';
+  html += '<tr><th>Test Type</th><th>Matrix</th><th>Suite</th><th>Test</th><th>Failing Since</th><th>Failure Count</th></tr>';
+  for (const e of items) {
+    html += `<tr>
+      <td>${esc(e.test_type||'')}</td>
+      <td><code>${esc(e.matrix_label||'')}</code></td>
+      <td>${esc(e.suite)}</td>
+      <td><code>${esc(e.name)}</code> <span class="badge badge-pass">FIXED</span></td>
+      <td>${esc(e.first_seen||'?')}</td>
+      <td>${e.failure_count||'?'}</td>
+    </tr>`;
+  }
+  html += '</table></section>';
+  return html;
+}
+
+/* ------------------------------------------------------------------ */
+/* Tab: Trends                                                         */
+/* ------------------------------------------------------------------ */
+function renderTrends() {
+  if (!S.index || !S.index.dates) return '<p class="empty-state">No trend data available.</p>';
+
+  const dates = Object.keys(S.index.dates).sort().slice(-14); // Last 14 days
+  if (!dates.length) return '<p class="empty-state">No trend data available.</p>';
+
+  let html = '<section><h2>Test Results — Last 14 Days</h2>';
+  html += '<div class="chart-container">';
+
+  // Find max total for scaling
+  let maxTotal = 1;
+  for (const d of dates) {
+    const t = S.index.dates[d].test_totals || {};
+    maxTotal = Math.max(maxTotal, t.total || 0);
+  }
+
+  for (const d of dates) {
+    const t = S.index.dates[d].test_totals || {};
+    const total = t.total || 0;
+    const passed = t.passed || 0;
+    const failed = t.failed || 0;
+    const flaky = t.flaky || 0;
+    const skipped = t.skipped || 0;
+
+    const pct = (n) => total > 0 ? (n / maxTotal * 100).toFixed(1) : 0;
+
+    html += `<div class="chart-bar-row">
+      <span class="date-label">${d.slice(5)}</span>
+      <div class="bar-track">
+        <div class="bar-segment" style="width:${pct(passed)}%;background:var(--pass)"></div>
+        <div class="bar-segment" style="width:${pct(failed)}%;background:var(--fail)"></div>
+        <div class="bar-segment" style="width:${pct(flaky)}%;background:var(--flaky)"></div>
+        <div class="bar-segment" style="width:${pct(skipped)}%;background:var(--skip)"></div>
+      </div>
+      <span class="bar-count">${total} tests</span>
+    </div>`;
+  }
+
+  html += '</div></section>';
+
+  // Job pass rate trend
+  html += '<section><h2>Matrix Job Pass Rate — Last 14 Days</h2>';
+  html += '<div class="chart-container">';
+
+  for (const d of dates) {
+    const j = S.index.dates[d].job_summary || {};
+    const total = j.total || 0;
+    const passed = j.passed || 0;
+    const failed = j.failed || 0;
+    const flaky = j.flaky || 0;
+    const pctOf = (n) => total > 0 ? (n / total * 100).toFixed(1) : 0;
+
+    html += `<div class="chart-bar-row">
+      <span class="date-label">${d.slice(5)}</span>
+      <div class="bar-track">
+        <div class="bar-segment" style="width:${pctOf(passed)}%;background:var(--pass)"></div>
+        <div class="bar-segment" style="width:${pctOf(flaky)}%;background:var(--flaky)"></div>
+        <div class="bar-segment" style="width:${pctOf(failed)}%;background:var(--fail)"></div>
+      </div>
+      <span class="bar-count">${passed}/${total}</span>
+    </div>`;
+  }
+
+  html += '</div></section>';
+  return html;
+}
+
+/* ------------------------------------------------------------------ */
+/* Tab router                                                          */
+/* ------------------------------------------------------------------ */
+function renderTab(tab) {
+  const container = document.getElementById('tab-content');
+  const renderers = {
+    matrix: renderMatrixGrid,
+    failures: renderFailures,
+    flaky: renderFlaky,
+    resolved: renderResolved,
+    trends: renderTrends,
+  };
+  container.innerHTML = (renderers[tab] || renderMatrixGrid)();
+}
+
+function bindTabEvents() {
+  for (const btn of document.querySelectorAll('.tab-btn')) {
+    btn.addEventListener('click', () => {
+      document.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active'));
+      btn.classList.add('active');
+      S.activeTab = btn.dataset.tab;
+      renderTab(S.activeTab);
+    });
+  }
+}
+
+/* ================================================================== */
+/* Filters                                                             */
+/* ================================================================== */
+function populateTestTypeFilters() {
+  const container = document.getElementById('test-type-filters');
+  const types = new Set((S.current.matrix_grid || []).map(g => g.test_type));
+  S.filters.testType = new Set(types); // all active by default
+
+  container.innerHTML = '';
+  for (const t of [...types].sort()) {
+    const chip = document.createElement('span');
+    chip.className = 'filter-chip active';
+    chip.textContent = t;
+    chip.dataset.type = t;
+    chip.addEventListener('click', () => {
+      chip.classList.toggle('active');
+      if (chip.classList.contains('active')) {
+        S.filters.testType.add(t);
+      } else {
+        S.filters.testType.delete(t);
+      }
+      renderTab(S.activeTab);
+    });
+    container.appendChild(chip);
+  }
+}
+
+function filterGrid(grid) {
+  return grid.filter(g => {
+    if (S.filters.testType.size && !S.filters.testType.has(g.test_type)) return false;
+    if (S.filters.status !== 'all' && g.status !== S.filters.status) return false;
+    return true;
+  });
+}
+
+function filterEntries(entries) {
+  return entries.filter(e => {
+    if (S.filters.testType.size && !S.filters.testType.has(e.test_type)) return false;
+    return true;
+  });
+}
+
+/* ================================================================== */
+/* UI helpers                                                          */
+/* ================================================================== */
+function populateDateSelector() {
+  const sel = document.getElementById('date-select');
+  const dates = Object.keys(S.index.dates || {}).sort().reverse();
+  sel.innerHTML = dates.map(d => `<option value="${d}">${d}</option>`).join('');
+}
+
+function setupEventListeners() {
+  document.getElementById('date-select').addEventListener('change', (e) => {
+    loadDate(e.target.value);
+  });
+
+  // Status filter chips
+  for (const chip of document.querySelectorAll('#status-filters .filter-chip')) {
+    chip.addEventListener('click', () => {
+      document.querySelectorAll('#status-filters .filter-chip')
+        .forEach(c => c.classList.remove('active'));
+      chip.classList.add('active');
+      S.filters.status = chip.dataset.status;
+      renderTab(S.activeTab);
+    });
+  }
+}
+
+function statusBadge(status) {
+  const map = {
+    'passed':           ['badge-pass', 'PASS'],
+    'failed-new':       ['badge-fail-new', 'NEW FAIL'],
+    'failed-recurring': ['badge-fail-recurring', 'RECURRING'],
+    'flaky':            ['badge-flaky', 'FLAKY'],
+    'no-results':       ['badge-no-results', 'NO DATA'],
+  };
+  const [cls, label] = map[status] || ['badge-no-results', status];
+  return `<span class="badge ${cls}">${label}</span>`;
+}
+
+function showEmpty(msg) {
+  document.getElementById('main-content').innerHTML =
+    `<div class="empty-state">${msg}</div>`;
+}
+
+function esc(s) {
+  const el = document.createElement('span');
+  el.textContent = String(s || '');
+  return el.innerHTML;
+}
+
+/* ================================================================== */
+/* Boot                                                                */
+/* ================================================================== */
+document.addEventListener('DOMContentLoaded', init);
+</script>
+</body>
+</html>
diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
new file mode 100755
index 0000000000..93576e1795
--- /dev/null
+++ b/ci/nightly_summary.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Aggregate all per-matrix nightly test summaries and send a single
+# consolidated Slack notification.  Runs as a post-test job after all
+# matrix CI jobs finish.
+#
+# Required environment variables:
+#   CUOPT_DATASET_S3_URI          - S3 base URI
+#   CUOPT_AWS_ACCESS_KEY_ID       - AWS credentials
+#   CUOPT_AWS_SECRET_ACCESS_KEY
+#
+# Optional:
+#   CUOPT_SLACK_WEBHOOK_URL       - sends Slack if set
+#   RAPIDS_BRANCH                 - branch name (default: main)
+#   RAPIDS_BUILD_TYPE             - build type (nightly, pull-request, etc.)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(dirname "$(realpath "${BASH_SOURCE[0]}")")"
+OUTPUT_DIR="${PWD}/aggregate-output"
+mkdir -p "${OUTPUT_DIR}"
+
+RUN_DATE="$(date +%F)"
+BRANCH="${RAPIDS_BRANCH:-main}"
+
+GITHUB_RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY:-NVIDIA/cuopt}/actions/runs/${GITHUB_RUN_ID:-}"
+
+if [ -z "${CUOPT_DATASET_S3_URI:-}" ]; then
+    echo "ERROR: CUOPT_DATASET_S3_URI is not set. Cannot aggregate." >&2
+    exit 1
+fi
+
+S3_BASE="${CUOPT_DATASET_S3_URI}ci_test_reports/nightly"
+S3_SUMMARIES_PREFIX="${S3_BASE}/summaries/${RUN_DATE}/"
+S3_REPORTS_PREFIX="${S3_BASE}/reports/${RUN_DATE}/"
+S3_CONSOLIDATED_JSON="${S3_BASE}/summaries/${RUN_DATE}/consolidated.json"
+S3_CONSOLIDATED_HTML="${S3_BASE}/reports/${RUN_DATE}/consolidated.html"
+S3_INDEX_URI="${S3_BASE}/index.json"
+S3_DASHBOARD_URI="${S3_BASE}/dashboard/index.html"
+DASHBOARD_DIR="${SCRIPT_DIR}/dashboard"
+
+echo "Aggregating nightly summaries from ${S3_SUMMARIES_PREFIX}"
+
+python3 "${SCRIPT_DIR}/utils/aggregate_nightly.py" \
+    --s3-summaries-prefix "${S3_SUMMARIES_PREFIX}" \
+    --s3-reports-prefix "${S3_REPORTS_PREFIX}" \
+    --s3-output-uri "${S3_CONSOLIDATED_JSON}" \
+    --s3-html-output-uri "${S3_CONSOLIDATED_HTML}" \
+    --s3-index-uri "${S3_INDEX_URI}" \
+    --s3-dashboard-uri "${S3_DASHBOARD_URI}" \
+    --dashboard-dir "${DASHBOARD_DIR}" \
+    --output-dir "${OUTPUT_DIR}" \
+    --date "${RUN_DATE}" \
+    --branch "${BRANCH}" \
+    --github-run-url "${GITHUB_RUN_URL}"
+
+# Send consolidated Slack notification if webhook is available and this is a nightly build
+if [ -n "${CUOPT_SLACK_WEBHOOK_URL:-}" ] && [ "${RAPIDS_BUILD_TYPE:-}" = "nightly" ]; then
+    echo "Sending consolidated Slack notification"
+    CONSOLIDATED_SUMMARY="${OUTPUT_DIR}/consolidated_summary.json" \
+    CONSOLIDATED_HTML="${OUTPUT_DIR}/consolidated_report.html" \
+    SLACK_WEBHOOK_URL="${CUOPT_SLACK_WEBHOOK_URL}" \
+    SLACK_BOT_TOKEN="${CUOPT_SLACK_BOT_TOKEN:-}" \
+    SLACK_CHANNEL_ID="${CUOPT_SLACK_CHANNEL_ID:-}" \
+    REPORT_URL="${S3_CONSOLIDATED_HTML}" \
+        bash "${SCRIPT_DIR}/utils/send_consolidated_summary.sh"
+fi
+
+echo "Nightly summary complete."
diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
new file mode 100644
index 0000000000..56ade2796e
--- /dev/null
+++ b/ci/utils/aggregate_nightly.py
@@ -0,0 +1,605 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Aggregate per-matrix nightly test summaries into a single consolidated report.
+
+Runs as a post-test job after all matrix CI jobs finish.  It:
+  1. Lists all JSON summaries uploaded to S3 for today's date
+  2. Downloads and merges them
+  3. Builds a matrix grid (test_type x matrix_label → status)
+  4. Generates a consolidated JSON, HTML report, and Slack payload
+  5. Uploads the consolidated report to S3
+
+Usage:
+  python ci/utils/aggregate_nightly.py \\
+      --s3-summaries-prefix s3://bucket/ci_test_reports/nightly/summaries/2026-04-13/ \\
+      --s3-reports-prefix s3://bucket/ci_test_reports/nightly/reports/2026-04-13/ \\
+      --output-dir /tmp/aggregate-output \\
+      --date 2026-04-13 \\
+      --branch main
+"""
+
+import argparse
+import json
+import os
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+# Ensure ci/utils is importable when invoked as a script
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from s3_helpers import s3_download, s3_upload, s3_list  # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# Download and merge summaries
+# ---------------------------------------------------------------------------
+
+def download_summaries(s3_prefix, local_dir):
+    """Download all JSON summaries from S3 prefix into local_dir.
+    Returns list of loaded summary dicts."""
+    local_dir = Path(local_dir)
+    local_dir.mkdir(parents=True, exist_ok=True)
+
+    uris = s3_list(s3_prefix)
+    json_uris = [u for u in uris if u.endswith(".json")]
+    print(f"Found {len(json_uris)} summary file(s) at {s3_prefix}")
+
+    summaries = []
+    for uri in json_uris:
+        filename = uri.rsplit("/", 1)[-1]
+        local_path = str(local_dir / filename)
+        if s3_download(uri, local_path):
+            try:
+                with open(local_path) as f:
+                    summaries.append(json.load(f))
+            except (json.JSONDecodeError, OSError) as exc:
+                print(f"WARNING: Failed to parse {local_path}: {exc}",
+                      file=sys.stderr)
+    return summaries
+
+
+def load_local_summaries(local_dir):
+    """Load summaries from a local directory (for testing without S3)."""
+    local_dir = Path(local_dir)
+    summaries = []
+    for json_file in sorted(local_dir.glob("*.json")):
+        try:
+            with open(json_file) as f:
+                summaries.append(json.load(f))
+        except (json.JSONDecodeError, OSError) as exc:
+            print(f"WARNING: Failed to parse {json_file}: {exc}",
+                  file=sys.stderr)
+    return summaries
+
+
+# ---------------------------------------------------------------------------
+# Aggregation
+# ---------------------------------------------------------------------------
+
+def aggregate_summaries(summaries):
+    """Merge per-matrix summaries into a consolidated view.
+
+    Returns a dict with:
+      - matrix_grid: list of {test_type, matrix_label, status, counts, ...}
+      - totals: aggregate counts
+      - all_new_failures, all_recurring_failures, all_flaky_tests,
+        all_resolved_tests: merged lists with matrix context added
+    """
+    grid = []
+    totals = {
+        "total": 0, "passed": 0, "failed": 0,
+        "flaky": 0, "skipped": 0, "resolved": 0,
+    }
+    all_new_failures = []
+    all_recurring_failures = []
+    all_flaky_tests = []
+    all_resolved_tests = []
+
+    for s in summaries:
+        test_type = s.get("test_type", "unknown")
+        matrix_label = s.get("matrix_label", "unknown")
+        counts = s.get("counts", {})
+
+        # Determine job status
+        failed = counts.get("failed", 0)
+        flaky = counts.get("flaky", 0)
+        has_new = s.get("has_new_failures", False)
+
+        if failed > 0:
+            status = "failed-new" if has_new else "failed-recurring"
+        elif flaky > 0:
+            status = "flaky"
+        elif counts.get("total", 0) == 0:
+            status = "no-results"
+        else:
+            status = "passed"
+
+        grid.append({
+            "test_type": test_type,
+            "matrix_label": matrix_label,
+            "status": status,
+            "counts": counts,
+            "sha": s.get("sha", ""),
+        })
+
+        # Accumulate totals
+        for key in totals:
+            totals[key] += counts.get(key, 0)
+
+        # Merge failure lists with matrix context
+        ctx = {"test_type": test_type, "matrix_label": matrix_label}
+        for entry in s.get("new_failures", []):
+            all_new_failures.append({**entry, **ctx})
+        for entry in s.get("recurring_failures", []):
+            all_recurring_failures.append({**entry, **ctx})
+        for entry in s.get("flaky_tests", []):
+            all_flaky_tests.append({**entry, **ctx})
+        for entry in s.get("resolved_tests", []):
+            all_resolved_tests.append({**entry, **ctx})
+
+    # Sort grid for consistent display
+    grid.sort(key=lambda g: (g["test_type"], g["matrix_label"]))
+
+    return {
+        "matrix_grid": grid,
+        "totals": totals,
+        "all_new_failures": all_new_failures,
+        "all_recurring_failures": all_recurring_failures,
+        "all_flaky_tests": all_flaky_tests,
+        "all_resolved_tests": all_resolved_tests,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Consolidated JSON
+# ---------------------------------------------------------------------------
+
+def generate_consolidated_json(agg, date_str, branch, github_run_url=""):
+    """Generate the consolidated JSON for Slack and dashboard."""
+    total_jobs = len(agg["matrix_grid"])
+    failed_jobs = sum(
+        1 for g in agg["matrix_grid"] if g["status"].startswith("failed")
+    )
+    flaky_jobs = sum(
+        1 for g in agg["matrix_grid"] if g["status"] == "flaky"
+    )
+    passed_jobs = sum(
+        1 for g in agg["matrix_grid"] if g["status"] == "passed"
+    )
+
+    return {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "date": date_str,
+        "branch": branch,
+        "github_run_url": github_run_url,
+        "job_summary": {
+            "total": total_jobs,
+            "passed": passed_jobs,
+            "failed": failed_jobs,
+            "flaky": flaky_jobs,
+        },
+        "test_totals": agg["totals"],
+        "has_new_failures": len(agg["all_new_failures"]) > 0,
+        "matrix_grid": agg["matrix_grid"],
+        "new_failures": agg["all_new_failures"],
+        "recurring_failures": agg["all_recurring_failures"],
+        "flaky_tests": agg["all_flaky_tests"],
+        "resolved_tests": agg["all_resolved_tests"],
+    }
+
+
+# ---------------------------------------------------------------------------
+# Consolidated HTML
+# ---------------------------------------------------------------------------
+
+def _html_escape(text):
+    return (
+        str(text).replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace('"', "&quot;")
+    )
+
+
+def _status_badge(status):
+    """Return an HTML badge for a matrix cell status."""
+    colors = {
+        "passed": ("#388e3c", "PASS"),
+        "failed-new": ("#d32f2f", "NEW FAIL"),
+        "failed-recurring": ("#e65100", "RECURRING"),
+        "flaky": ("#f9a825", "FLAKY"),
+        "no-results": ("#757575", "NO DATA"),
+    }
+    bg, label = colors.get(status, ("#757575", status.upper()))
+    text_color = "#212121" if status == "flaky" else "#fff"
+    return (
+        f'<span style="display:inline-block;padding:3px 8px;border-radius:4px;'
+        f'background:{bg};color:{text_color};font-size:0.75rem;font-weight:600">'
+        f'{label}</span>'
+    )
+
+
+def generate_consolidated_html(
+    agg, date_str, branch, github_run_url="", s3_reports_prefix="",
+):
+    """Generate a consolidated HTML dashboard for all matrix combos."""
+    total_jobs = len(agg["matrix_grid"])
+    failed_jobs = sum(
+        1 for g in agg["matrix_grid"] if g["status"].startswith("failed")
+    )
+
+    if failed_jobs > 0:
+        bar_color = "#d32f2f"
+        bar_text = f"{failed_jobs} of {total_jobs} matrix jobs have failures"
+    elif any(g["status"] == "flaky" for g in agg["matrix_grid"]):
+        bar_color = "#f9a825"
+        bar_text = "All jobs passed (flaky tests detected)"
+    else:
+        bar_color = "#388e3c"
+        bar_text = f"All {total_jobs} matrix jobs passed"
+
+    totals = agg["totals"]
+
+    parts = []
+    parts.append(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>cuOpt Nightly — {_html_escape(branch)} — {_html_escape(date_str)}</title>
+<style>
+  :root {{ --fail: #d32f2f; --pass: #388e3c; --flaky: #f9a825; --skip: #757575;
+           --bg: #fafafa; --card: #fff; --border: #e0e0e0; --text: #212121; }}
+  * {{ margin: 0; padding: 0; box-sizing: border-box; }}
+  body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
+          Helvetica, Arial, sans-serif; background: var(--bg); color: var(--text);
+          padding: 24px; max-width: 1400px; margin: 0 auto; }}
+  h1 {{ font-size: 1.5rem; margin-bottom: 4px; }}
+  .meta {{ color: #616161; font-size: 0.85rem; margin-bottom: 16px; }}
+  .meta a {{ color: #1565c0; }}
+  .status-bar {{ padding: 12px 16px; border-radius: 8px; color: #fff;
+                 font-weight: 600; margin-bottom: 20px; font-size: 1.1rem; }}
+  .summary-grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(130px, 1fr));
+                   gap: 12px; margin-bottom: 24px; }}
+  .summary-card {{ background: var(--card); border: 1px solid var(--border);
+                   border-radius: 8px; padding: 14px; text-align: center; }}
+  .summary-card .num {{ font-size: 1.8rem; font-weight: 700; }}
+  .summary-card .lbl {{ font-size: 0.75rem; color: #757575; text-transform: uppercase; }}
+  .num.pass {{ color: var(--pass); }}  .num.fail {{ color: var(--fail); }}
+  .num.flaky {{ color: var(--flaky); }}  .num.skip {{ color: var(--skip); }}
+  section {{ margin-bottom: 24px; }}
+  h2 {{ font-size: 1.15rem; margin-bottom: 10px; padding-bottom: 4px;
+        border-bottom: 2px solid var(--border); }}
+  table {{ width: 100%; border-collapse: collapse; font-size: 0.85rem; }}
+  th {{ background: #f5f5f5; text-align: left; padding: 8px 10px; font-weight: 600;
+        position: sticky; top: 0; }}
+  td {{ padding: 8px 10px; border-bottom: 1px solid var(--border); vertical-align: top; }}
+  tr:hover td {{ background: #f5f5f5; }}
+  details {{ margin-top: 4px; }}
+  details summary {{ cursor: pointer; color: #1565c0; font-size: 0.8rem; }}
+  pre.error {{ background: #263238; color: #e0e0e0; padding: 12px; border-radius: 6px;
+               font-size: 0.78rem; overflow-x: auto; white-space: pre-wrap;
+               word-break: break-word; max-height: 300px; margin-top: 6px; }}
+  .matrix-link {{ color: #1565c0; text-decoration: none; }}
+  .matrix-link:hover {{ text-decoration: underline; }}
+</style>
+</head>
+<body>
+<h1>cuOpt Nightly Tests — {_html_escape(branch)}</h1>
+<div class="meta">
+  Date: <strong>{_html_escape(date_str)}</strong>""")
+
+    if github_run_url:
+        parts.append(
+            f' &nbsp;|&nbsp; <a href="{_html_escape(github_run_url)}">'
+            f'GitHub Actions Run</a>'
+        )
+
+    parts.append(f"""</div>
+<div class="status-bar" style="background:{bar_color}">{bar_text}</div>
+<div class="summary-grid">
+  <div class="summary-card"><div class="num">{totals['total']}</div><div class="lbl">Total Tests</div></div>
+  <div class="summary-card"><div class="num pass">{totals['passed']}</div><div class="lbl">Passed</div></div>
+  <div class="summary-card"><div class="num fail">{totals['failed']}</div><div class="lbl">Failed</div></div>
+  <div class="summary-card"><div class="num flaky">{totals['flaky']}</div><div class="lbl">Flaky</div></div>
+  <div class="summary-card"><div class="num skip">{totals['skipped']}</div><div class="lbl">Skipped</div></div>
+  <div class="summary-card"><div class="num pass">{totals['resolved']}</div><div class="lbl">Stabilized</div></div>
+</div>""")
+
+    # --- Matrix grid ---
+    parts.append("<section><h2>Matrix Overview</h2><table>")
+    parts.append(
+        "<tr><th>Test Type</th><th>Matrix</th><th>Status</th>"
+        "<th>Passed</th><th>Failed</th><th>Flaky</th><th>Total</th><th>Report</th></tr>"
+    )
+    for g in agg["matrix_grid"]:
+        counts = g["counts"]
+        # Build link to per-matrix HTML report on S3
+        report_link = ""
+        if s3_reports_prefix:
+            report_filename = (
+                f'{g["test_type"]}-{g["matrix_label"]}.html'
+            )
+            report_link = (
+                f'<a class="matrix-link" href="{_html_escape(s3_reports_prefix)}'
+                f'{_html_escape(report_filename)}">View</a>'
+            )
+        parts.append(
+            f'<tr><td><strong>{_html_escape(g["test_type"])}</strong></td>'
+            f'<td><code>{_html_escape(g["matrix_label"])}</code></td>'
+            f'<td>{_status_badge(g["status"])}</td>'
+            f'<td>{counts.get("passed", 0)}</td>'
+            f'<td>{counts.get("failed", 0)}</td>'
+            f'<td>{counts.get("flaky", 0)}</td>'
+            f'<td>{counts.get("total", 0)}</td>'
+            f"<td>{report_link}</td></tr>"
+        )
+    parts.append("</table></section>")
+
+    # --- New failures ---
+    if agg["all_new_failures"]:
+        parts.append("<section><h2>New Failures</h2><table>")
+        parts.append(
+            "<tr><th>Test Type</th><th>Matrix</th><th>Suite</th>"
+            "<th>Test</th><th>Error</th></tr>"
+        )
+        for e in agg["all_new_failures"]:
+            msg = _html_escape(e.get("message", ""))
+            short = _html_escape(e.get("message", "")[:100])
+            parts.append(
+                f'<tr><td>{_html_escape(e["test_type"])}</td>'
+                f'<td><code>{_html_escape(e["matrix_label"])}</code></td>'
+                f'<td>{_html_escape(e["suite"])}</td>'
+                f'<td><code>{_html_escape(e["name"])}</code></td>'
+                f'<td><details><summary>{short}</summary>'
+                f'<pre class="error">{msg}</pre></details></td></tr>'
+            )
+        parts.append("</table></section>")
+
+    # --- Recurring failures ---
+    if agg["all_recurring_failures"]:
+        parts.append("<section><h2>Recurring Failures</h2><table>")
+        parts.append(
+            "<tr><th>Test Type</th><th>Matrix</th><th>Suite</th>"
+            "<th>Test</th><th>Since</th><th>Error</th></tr>"
+        )
+        for e in agg["all_recurring_failures"]:
+            msg = _html_escape(e.get("message", ""))
+            short = _html_escape(e.get("message", "")[:100])
+            parts.append(
+                f'<tr><td>{_html_escape(e["test_type"])}</td>'
+                f'<td><code>{_html_escape(e["matrix_label"])}</code></td>'
+                f'<td>{_html_escape(e["suite"])}</td>'
+                f'<td><code>{_html_escape(e["name"])}</code></td>'
+                f'<td>{_html_escape(e.get("first_seen", "?"))}</td>'
+                f'<td><details><summary>{short}</summary>'
+                f'<pre class="error">{msg}</pre></details></td></tr>'
+            )
+        parts.append("</table></section>")
+
+    # --- Resolved ---
+    if agg["all_resolved_tests"]:
+        parts.append("<section><h2>Stabilized Tests</h2><table>")
+        parts.append(
+            "<tr><th>Test Type</th><th>Matrix</th><th>Suite</th>"
+            "<th>Test</th><th>Failing Since</th><th>Count</th></tr>"
+        )
+        for e in agg["all_resolved_tests"]:
+            parts.append(
+                f'<tr><td>{_html_escape(e["test_type"])}</td>'
+                f'<td><code>{_html_escape(e["matrix_label"])}</code></td>'
+                f'<td>{_html_escape(e["suite"])}</td>'
+                f'<td><code>{_html_escape(e["name"])}</code></td>'
+                f'<td>{_html_escape(e.get("first_seen", "?"))}</td>'
+                f'<td>{e.get("failure_count", "?")}</td></tr>'
+            )
+        parts.append("</table></section>")
+
+    # --- Flaky ---
+    if agg["all_flaky_tests"]:
+        parts.append("<section><h2>Flaky Tests</h2><table>")
+        parts.append(
+            "<tr><th>Test Type</th><th>Matrix</th><th>Suite</th>"
+            "<th>Test</th><th>Retries</th></tr>"
+        )
+        for e in agg["all_flaky_tests"]:
+            parts.append(
+                f'<tr><td>{_html_escape(e["test_type"])}</td>'
+                f'<td><code>{_html_escape(e["matrix_label"])}</code></td>'
+                f'<td>{_html_escape(e["suite"])}</td>'
+                f'<td><code>{_html_escape(e["name"])}</code></td>'
+                f'<td>{e.get("retry_count", "?")}</td></tr>'
+            )
+        parts.append("</table></section>")
+
+    if (
+        not agg["all_new_failures"]
+        and not agg["all_recurring_failures"]
+        and not agg["all_flaky_tests"]
+        and not agg["all_resolved_tests"]
+    ):
+        parts.append(
+            '<p style="color:#9e9e9e;font-style:italic;padding:16px">'
+            "All tests passed across all matrices!</p>"
+        )
+
+    parts.append("</body></html>")
+    return "\n".join(parts)
+
+
+# ---------------------------------------------------------------------------
+# Index management
+# ---------------------------------------------------------------------------
+
+MAX_INDEX_DAYS = 90  # Keep at most 90 days in the index
+
+
+def update_index(s3_index_uri, date_str, consolidated, output_dir):
+    """Download index.json, add today's entry, prune old entries, re-upload."""
+    local_index = str(output_dir / "index.json")
+
+    # Download existing index (or start fresh)
+    index = {"_schema_version": 1, "dates": {}}
+    if s3_download(s3_index_uri, local_index):
+        try:
+            with open(local_index) as f:
+                loaded = json.load(f)
+                if "dates" in loaded:
+                    index = loaded
+        except (json.JSONDecodeError, OSError):
+            pass
+
+    # Add today's entry (compact — just enough for the dashboard trends)
+    index["dates"][date_str] = {
+        "job_summary": consolidated.get("job_summary", {}),
+        "test_totals": consolidated.get("test_totals", {}),
+        "has_new_failures": consolidated.get("has_new_failures", False),
+        "branch": consolidated.get("branch", ""),
+        "github_run_url": consolidated.get("github_run_url", ""),
+    }
+
+    # Prune to last N days
+    dates_sorted = sorted(index["dates"].keys(), reverse=True)
+    if len(dates_sorted) > MAX_INDEX_DAYS:
+        for old_date in dates_sorted[MAX_INDEX_DAYS:]:
+            del index["dates"][old_date]
+
+    # Write and upload
+    with open(local_index, "w") as f:
+        json.dump(index, f, indent=2, sort_keys=True)
+        f.write("\n")
+    print(f"Updated index.json with {len(index['dates'])} date(s)")
+
+    s3_upload(local_index, s3_index_uri)
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Aggregate per-matrix nightly test summaries"
+    )
+    parser.add_argument(
+        "--s3-summaries-prefix", default="",
+        help="S3 prefix for per-matrix JSON summaries (e.g., s3://bucket/.../summaries/2026-04-13/)",
+    )
+    parser.add_argument(
+        "--s3-reports-prefix", default="",
+        help="S3 prefix where per-matrix HTML reports live (for linking)",
+    )
+    parser.add_argument(
+        "--s3-output-uri", default="",
+        help="S3 URI to upload the consolidated JSON",
+    )
+    parser.add_argument(
+        "--s3-html-output-uri", default="",
+        help="S3 URI to upload the consolidated HTML report",
+    )
+    parser.add_argument(
+        "--s3-index-uri", default="",
+        help="S3 URI for the index.json that tracks all available dates (read + write)",
+    )
+    parser.add_argument(
+        "--s3-dashboard-uri", default="",
+        help="S3 URI to upload the dashboard HTML (e.g., s3://bucket/.../dashboard/index.html)",
+    )
+    parser.add_argument(
+        "--dashboard-dir", default="",
+        help="Local directory containing dashboard files to upload",
+    )
+    parser.add_argument(
+        "--local-summaries-dir", default="",
+        help="Local directory with JSON summaries (alternative to S3, for testing)",
+    )
+    parser.add_argument(
+        "--output-dir", default="aggregate-output",
+        help="Local directory to write output files",
+    )
+    parser.add_argument(
+        "--date",
+        default=datetime.now(timezone.utc).strftime("%Y-%m-%d"),
+        help="Date for this run (YYYY-MM-DD)",
+    )
+    parser.add_argument("--branch", default="main", help="Branch name")
+    parser.add_argument(
+        "--github-run-url", default="", help="URL to the GitHub Actions run",
+    )
+
+    args = parser.parse_args()
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # ---- Step 1: Collect summaries ----
+    if args.local_summaries_dir:
+        summaries = load_local_summaries(args.local_summaries_dir)
+    elif args.s3_summaries_prefix:
+        download_dir = output_dir / "downloaded_summaries"
+        summaries = download_summaries(args.s3_summaries_prefix, download_dir)
+    else:
+        print("ERROR: Provide --s3-summaries-prefix or --local-summaries-dir",
+              file=sys.stderr)
+        return 1
+
+    if not summaries:
+        print("WARNING: No summaries found. Generating empty report.",
+              file=sys.stderr)
+
+    print(f"Loaded {len(summaries)} matrix summary file(s)")
+
+    # ---- Step 2: Aggregate ----
+    agg = aggregate_summaries(summaries)
+    print(
+        f"Matrix grid: {len(agg['matrix_grid'])} jobs — "
+        f"{sum(1 for g in agg['matrix_grid'] if g['status'] == 'passed')} passed, "
+        f"{sum(1 for g in agg['matrix_grid'] if g['status'].startswith('failed'))} failed, "
+        f"{sum(1 for g in agg['matrix_grid'] if g['status'] == 'flaky')} flaky"
+    )
+
+    # ---- Step 3: Generate outputs ----
+    consolidated = generate_consolidated_json(
+        agg, args.date, args.branch, args.github_run_url,
+    )
+
+    json_path = output_dir / "consolidated_summary.json"
+    json_path.write_text(json.dumps(consolidated, indent=2) + "\n")
+    print(f"Consolidated JSON written to {json_path}")
+
+    html_report = generate_consolidated_html(
+        agg, args.date, args.branch, args.github_run_url,
+        args.s3_reports_prefix,
+    )
+    html_path = output_dir / "consolidated_report.html"
+    html_path.write_text(html_report)
+    print(f"Consolidated HTML written to {html_path}")
+
+    # ---- Step 4: Upload to S3 ----
+    if args.s3_output_uri:
+        s3_upload(str(json_path), args.s3_output_uri)
+    if args.s3_html_output_uri:
+        s3_upload(str(html_path), args.s3_html_output_uri)
+
+    # ---- Step 5: Update index.json ----
+    if args.s3_index_uri:
+        update_index(
+            args.s3_index_uri, args.date, consolidated, output_dir,
+        )
+
+    # ---- Step 6: Upload dashboard ----
+    if args.s3_dashboard_uri and args.dashboard_dir:
+        dashboard_file = Path(args.dashboard_dir) / "index.html"
+        if dashboard_file.exists():
+            s3_upload(str(dashboard_file), args.s3_dashboard_uri)
+        else:
+            print(f"WARNING: Dashboard not found at {dashboard_file}",
+                  file=sys.stderr)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
new file mode 100755
index 0000000000..4f421678dc
--- /dev/null
+++ b/ci/utils/send_consolidated_summary.sh
@@ -0,0 +1,287 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Send a single consolidated Slack notification for the entire nightly run.
+# Reads the aggregated JSON produced by aggregate_nightly.py and sends a rich
+# Slack message with:
+#   - Matrix grid overview (test_type x matrix → status)
+#   - Failure tables with :new: / :repeat: badges and matrix context
+#   - @channel on new genuine failures
+#   - Stabilized and flaky test summaries
+#   - Link to GitHub Actions run and consolidated HTML report
+#
+# Required environment variables:
+#   SLACK_WEBHOOK_URL       - Slack incoming webhook URL
+#   CONSOLIDATED_SUMMARY    - Path to consolidated_summary.json
+#
+# Optional environment variables:
+#   REPORT_URL              - Link to the consolidated HTML report on S3
+#   CONSOLIDATED_HTML       - Path to consolidated HTML file to upload to Slack
+#   SLACK_BOT_TOKEN         - Slack Bot Token (xoxb-*) for file uploads
+#   SLACK_CHANNEL_ID        - Slack channel ID for file uploads (required with bot token)
+
+set -euo pipefail
+
+CONSOLIDATED_SUMMARY="${CONSOLIDATED_SUMMARY:?CONSOLIDATED_SUMMARY must point to consolidated_summary.json}"
+SLACK_WEBHOOK_URL="${SLACK_WEBHOOK_URL:?SLACK_WEBHOOK_URL is required}"
+REPORT_URL="${REPORT_URL:-}"
+CONSOLIDATED_HTML="${CONSOLIDATED_HTML:-}"
+SLACK_BOT_TOKEN="${SLACK_BOT_TOKEN:-}"
+SLACK_CHANNEL_ID="${SLACK_CHANNEL_ID:-}"
+
+if [ ! -f "${CONSOLIDATED_SUMMARY}" ]; then
+    echo "ERROR: Summary file not found: ${CONSOLIDATED_SUMMARY}" >&2
+    exit 1
+fi
+
+PAYLOAD=$(python3 - "${CONSOLIDATED_SUMMARY}" "${REPORT_URL}" <<'PYEOF'
+import json, sys
+
+summary_path, report_url = sys.argv[1:3]
+
+with open(summary_path) as f:
+    d = json.load(f)
+
+branch = d.get("branch", "main")
+date = d.get("date", "unknown")
+github_run_url = d.get("github_run_url", "")
+jobs = d.get("job_summary", {})
+totals = d.get("test_totals", {})
+grid = d.get("matrix_grid", [])
+has_new = d.get("has_new_failures", False)
+
+total_jobs = jobs.get("total", 0)
+failed_jobs = jobs.get("failed", 0)
+flaky_jobs = jobs.get("flaky", 0)
+passed_jobs = jobs.get("passed", 0)
+
+# --- Status line ---
+if failed_jobs > 0 and has_new:
+    emoji = ":rotating_light:"
+    text = f"NEW test failures in {failed_jobs} matrix job(s)"
+    mention = "<!channel> "
+elif failed_jobs > 0:
+    emoji = ":x:"
+    text = f"Recurring failures in {failed_jobs} matrix job(s)"
+    mention = ""
+elif flaky_jobs > 0:
+    emoji = ":large_yellow_circle:"
+    text = "All jobs passed but flaky tests detected"
+    mention = ""
+else:
+    emoji = ":white_check_mark:"
+    text = f"All {total_jobs} matrix jobs passed"
+    mention = ""
+
+stats = (
+    f":white_check_mark: {totals.get('passed', 0)} passed  |  "
+    f":x: {totals.get('failed', 0)} failed  |  "
+    f":warning: {totals.get('flaky', 0)} flaky  |  "
+    f":fast_forward: {totals.get('skipped', 0)} skipped  |  "
+    f"Total: {totals.get('total', 0)}"
+)
+
+blocks = []
+
+# Header
+blocks.append({
+    "type": "header",
+    "text": {
+        "type": "plain_text",
+        "text": f"cuOpt Nightly Tests \u2014 {branch} \u2014 {date}",
+        "emoji": True,
+    },
+})
+
+# Status summary
+blocks.append({
+    "type": "section",
+    "text": {
+        "type": "mrkdwn",
+        "text": f"{mention}{emoji} *{text}*\n\n{stats}",
+    },
+})
+
+blocks.append({"type": "divider"})
+
+# --- Matrix grid (compact) ---
+# Group by test_type for readability
+test_types = {}
+for g in grid:
+    tt = g["test_type"]
+    test_types.setdefault(tt, []).append(g)
+
+status_icons = {
+    "passed": ":white_check_mark:",
+    "failed-new": ":rotating_light:",
+    "failed-recurring": ":x:",
+    "flaky": ":warning:",
+    "no-results": ":grey_question:",
+}
+
+grid_lines = []
+for tt, entries in sorted(test_types.items()):
+    cells = []
+    for g in entries:
+        icon = status_icons.get(g["status"], ":grey_question:")
+        label = g["matrix_label"]
+        failed_count = g["counts"].get("failed", 0)
+        if failed_count > 0:
+            cells.append(f"{icon} `{label}` ({failed_count} failures)")
+        else:
+            cells.append(f"{icon} `{label}`")
+    grid_lines.append(f"*{tt}*\n" + "\n".join(f"    {c}" for c in cells))
+
+# Slack blocks have a 3000 char limit per text field; truncate if needed
+grid_text = "\n".join(grid_lines)
+if len(grid_text) > 2900:
+    # Summarize instead of full grid
+    grid_text = (
+        f"*Matrix Summary:* {passed_jobs} passed, {failed_jobs} failed, "
+        f"{flaky_jobs} flaky out of {total_jobs} jobs\n"
+        f"_(Full matrix in report link below)_"
+    )
+
+blocks.append({
+    "type": "section",
+    "text": {"type": "mrkdwn", "text": grid_text},
+})
+
+# --- New failures (max 10 to avoid hitting Slack limits) ---
+new_failures = d.get("new_failures", [])
+if new_failures:
+    blocks.append({"type": "divider"})
+    lines = []
+    for f_entry in new_failures[:10]:
+        msg = f_entry.get("message", "")[:50].replace("\n", " ")
+        matrix = f_entry.get("matrix_label", "")
+        lines.append(
+            f"  :new:  `{f_entry['name']}` ({f_entry['test_type']} / {matrix}) \u2014 {msg}"
+        )
+    if len(new_failures) > 10:
+        lines.append(f"  _...and {len(new_failures) - 10} more_")
+    blocks.append({
+        "type": "section",
+        "text": {"type": "mrkdwn", "text": "*New Failures:*\n" + "\n".join(lines)},
+    })
+
+# --- Recurring failures (max 10) ---
+recurring = d.get("recurring_failures", [])
+if recurring:
+    blocks.append({"type": "divider"})
+    lines = []
+    for f_entry in recurring[:10]:
+        matrix = f_entry.get("matrix_label", "")
+        first = f_entry.get("first_seen", "?")
+        lines.append(
+            f"  :repeat:  `{f_entry['name']}` ({f_entry['test_type']} / {matrix}) \u2014 since {first}"
+        )
+    if len(recurring) > 10:
+        lines.append(f"  _...and {len(recurring) - 10} more_")
+    blocks.append({
+        "type": "section",
+        "text": {"type": "mrkdwn", "text": "*Recurring Failures:*\n" + "\n".join(lines)},
+    })
+
+# --- Stabilized ---
+resolved = d.get("resolved_tests", [])
+if resolved:
+    lines = []
+    for r in resolved[:5]:
+        matrix = r.get("matrix_label", "")
+        count = r.get("failure_count", "?")
+        lines.append(
+            f"  :white_check_mark:  `{r['name']}` ({r['test_type']} / {matrix}) \u2014 failed {count}x"
+        )
+    if len(resolved) > 5:
+        lines.append(f"  _...and {len(resolved) - 5} more_")
+    blocks.append({
+        "type": "section",
+        "text": {
+            "type": "mrkdwn",
+            "text": "*Stabilized (were failing, now pass):*\n" + "\n".join(lines),
+        },
+    })
+
+# --- Flaky summary (count only to save space) ---
+flaky = d.get("flaky_tests", [])
+if flaky:
+    # Group by test name to show unique flaky tests
+    unique_flaky = {}
+    for f_entry in flaky:
+        key = f_entry["name"]
+        unique_flaky.setdefault(key, []).append(f_entry.get("matrix_label", ""))
+    lines = []
+    for name, matrices in sorted(unique_flaky.items())[:5]:
+        matrix_str = ", ".join(matrices[:3])
+        if len(matrices) > 3:
+            matrix_str += f" +{len(matrices)-3} more"
+        lines.append(f"  :warning:  `{name}` ({matrix_str})")
+    if len(unique_flaky) > 5:
+        lines.append(f"  _...and {len(unique_flaky) - 5} more unique flaky tests_")
+    blocks.append({
+        "type": "section",
+        "text": {"type": "mrkdwn", "text": "*Flaky Tests:*\n" + "\n".join(lines)},
+    })
+
+# --- Links ---
+link_parts = []
+if github_run_url:
+    link_parts.append(f"<{github_run_url}|GitHub Actions>")
+if report_url:
+    link_parts.append(f"<{report_url}|Full Report>")
+if link_parts:
+    blocks.append({"type": "divider"})
+    blocks.append({
+        "type": "context",
+        "elements": [{"type": "mrkdwn", "text": "  ".join(link_parts)}],
+    })
+
+payload = {
+    "channel": "cuopt-regression-testing",
+    "username": "cuOpt Nightly Bot",
+    "icon_emoji": ":robot_face:",
+    "blocks": blocks,
+}
+print(json.dumps(payload))
+PYEOF
+)
+
+echo "Sending consolidated Slack notification..."
+curl -s -X POST \
+    -H 'Content-type: application/json' \
+    --data "${PAYLOAD}" \
+    "${SLACK_WEBHOOK_URL}"
+
+echo ""
+echo "Consolidated Slack notification sent."
+
+# Upload HTML report as a file to Slack (requires bot token)
+if [ -n "${SLACK_BOT_TOKEN}" ] && [ -n "${SLACK_CHANNEL_ID}" ] && [ -n "${CONSOLIDATED_HTML}" ] && [ -f "${CONSOLIDATED_HTML}" ]; then
+    echo "Uploading HTML report to Slack..."
+
+    # Read date and branch from the summary for the filename
+    REPORT_DATE=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('date','report'))" "${CONSOLIDATED_SUMMARY}" 2>/dev/null || echo "report")
+    REPORT_BRANCH=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('branch','main'))" "${CONSOLIDATED_SUMMARY}" 2>/dev/null || echo "main")
+    UPLOAD_FILENAME="cuopt-nightly-${REPORT_BRANCH}-${REPORT_DATE}.html"
+
+    UPLOAD_RESPONSE=$(curl -s -X POST \
+        -H "Authorization: Bearer ${SLACK_BOT_TOKEN}" \
+        -F "channels=${SLACK_CHANNEL_ID}" \
+        -F "file=@${CONSOLIDATED_HTML}" \
+        -F "filename=${UPLOAD_FILENAME}" \
+        -F "title=cuOpt Nightly Report — ${REPORT_BRANCH} — ${REPORT_DATE}" \
+        -F "initial_comment=Full nightly test report attached. Download and open in a browser for interactive details." \
+        "https://slack.com/api/files.upload")
+
+    if echo "${UPLOAD_RESPONSE}" | python3 -c "import json,sys; sys.exit(0 if json.load(sys.stdin).get('ok') else 1)" 2>/dev/null; then
+        echo "HTML report uploaded to Slack."
+    else
+        echo "WARNING: Slack file upload failed. Response: ${UPLOAD_RESPONSE}" >&2
+    fi
+else
+    if [ -n "${SLACK_BOT_TOKEN}" ] && [ -z "${SLACK_CHANNEL_ID}" ]; then
+        echo "WARNING: SLACK_BOT_TOKEN set but SLACK_CHANNEL_ID missing, skipping file upload." >&2
+    fi
+fi
diff --git a/ci/utils/send_nightly_summary.sh b/ci/utils/send_nightly_summary.sh
new file mode 100755
index 0000000000..7c2d16519c
--- /dev/null
+++ b/ci/utils/send_nightly_summary.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Enhanced Slack notification for nightly test results.
+# Reads the JSON summary produced by nightly_report.py and sends a rich
+# Slack message with:
+#   - Failure tables with :new: / :repeat: badges
+#   - @channel on new genuine failures
+#   - Stabilized tests (were failing, now passing)
+#   - Flaky test list
+#
+# Required environment variables:
+#   SLACK_WEBHOOK_URL  - Slack incoming webhook URL (set from CUOPT_SLACK_WEBHOOK_URL in CI)
+#   NIGHTLY_SUMMARY    - Path to nightly_summary.json from nightly_report.py
+#
+# Optional environment variables:
+#   GITHUB_RUN_URL     - Link to the GitHub Actions run
+#   REPORT_URL         - Link to the S3 HTML report
+#   CUOPT_BRANCH       - Branch name (e.g. main, release/26.06)
+
+set -euo pipefail
+
+NIGHTLY_SUMMARY="${NIGHTLY_SUMMARY:?NIGHTLY_SUMMARY must point to nightly_summary.json}"
+SLACK_WEBHOOK_URL="${SLACK_WEBHOOK_URL:?SLACK_WEBHOOK_URL is required}"
+GITHUB_RUN_URL="${GITHUB_RUN_URL:-}"
+REPORT_URL="${REPORT_URL:-}"
+CUOPT_BRANCH="${CUOPT_BRANCH:-main}"
+
+if [ ! -f "${NIGHTLY_SUMMARY}" ]; then
+    echo "ERROR: Summary file not found: ${NIGHTLY_SUMMARY}" >&2
+    exit 1
+fi
+
+# Build the entire Slack payload in Python for safe JSON handling.
+# Shell variable interpolation into nested JSON is brittle; Python reads the
+# summary file directly and produces a valid JSON payload on stdout.
+PAYLOAD=$(python3 - "${NIGHTLY_SUMMARY}" "${CUOPT_BRANCH}" "${GITHUB_RUN_URL}" "${REPORT_URL}" <<'PYEOF'
+import json, sys
+
+summary_path, branch, github_run_url, report_url = sys.argv[1:5]
+
+with open(summary_path) as f:
+    d = json.load(f)
+
+counts = d["counts"]
+total = counts["total"]
+passed = counts["passed"]
+failed = counts["failed"]
+flaky = counts["flaky"]
+skipped = counts["skipped"]
+resolved = counts.get("resolved", 0)
+has_new = d["has_new_failures"]
+
+# --- Status line ---
+if failed > 0:
+    if has_new:
+        emoji = ":rotating_light:"
+        text = "NEW test failures detected"
+        mention = "<!channel> "
+    else:
+        emoji = ":x:"
+        text = "Recurring test failures"
+        mention = ""
+elif flaky > 0:
+    emoji = ":large_yellow_circle:"
+    text = "All passed but flaky tests detected"
+    mention = ""
+else:
+    emoji = ":white_check_mark:"
+    text = "All tests passed"
+    mention = ""
+
+stats = (
+    f":white_check_mark: {passed} passed  |  :x: {failed} failed  |  "
+    f":warning: {flaky} flaky  |  :fast_forward: {skipped} skipped  |  Total: {total}"
+)
+
+blocks = []
+
+# Header
+blocks.append({
+    "type": "header",
+    "text": {"type": "plain_text", "text": f"cuOpt Nightly Tests \u2014 {branch}", "emoji": True},
+})
+
+# Status summary
+blocks.append({
+    "type": "section",
+    "text": {"type": "mrkdwn", "text": f"{mention}{emoji} *{text}*\n\n{stats}"},
+})
+
+blocks.append({"type": "divider"})
+
+# --- Genuine failures ---
+if failed > 0:
+    lines = []
+    for f_entry in d.get("new_failures", []):
+        msg = f_entry.get("message", "")[:60].replace("\n", " ")
+        lines.append(f"  :new:  `{f_entry['name']}` ({f_entry['suite']}) \u2014 {msg}")
+    for f_entry in d.get("recurring_failures", []):
+        msg = f_entry.get("message", "")[:60].replace("\n", " ")
+        first = f_entry.get("first_seen", "?")
+        lines.append(f"  :repeat:  `{f_entry['name']}` ({f_entry['suite']}) \u2014 since {first}")
+    blocks.append({
+        "type": "section",
+        "text": {"type": "mrkdwn", "text": "*Genuine Failures:*\n" + "\n".join(lines)},
+    })
+
+# --- Stabilized tests ---
+resolved_list = d.get("resolved_tests", [])
+if resolved_list:
+    lines = []
+    for r in resolved_list:
+        since = r.get("first_seen", "?")
+        count = r.get("failure_count", "?")
+        flaky_tag = " (was flaky)" if r.get("was_flaky") else ""
+        lines.append(
+            f"  :white_check_mark:  `{r['name']}` ({r['suite']}) \u2014 "
+            f"failing since {since}, failed {count}x{flaky_tag}"
+        )
+    blocks.append({
+        "type": "section",
+        "text": {
+            "type": "mrkdwn",
+            "text": "*Stabilized (were failing, now pass):*\n" + "\n".join(lines),
+        },
+    })
+
+# --- Flaky tests ---
+flaky_list = d.get("flaky_tests", [])
+if flaky_list:
+    lines = []
+    for f_entry in flaky_list:
+        retries = f_entry.get("retry_count", "?")
+        lines.append(f"  :warning:  `{f_entry['name']}` ({f_entry['suite']}) \u2014 {retries} retries")
+    blocks.append({
+        "type": "section",
+        "text": {"type": "mrkdwn", "text": "*Flaky Tests (passed on retry):*\n" + "\n".join(lines)},
+    })
+
+# --- Links ---
+link_parts = []
+if github_run_url:
+    link_parts.append(f"<{github_run_url}|GitHub Actions>")
+if report_url:
+    link_parts.append(f"<{report_url}|Full Report>")
+if link_parts:
+    blocks.append({"type": "divider"})
+    blocks.append({
+        "type": "context",
+        "elements": [{"type": "mrkdwn", "text": "  ".join(link_parts)}],
+    })
+
+payload = {
+    "channel": "cuopt-regression-testing",
+    "username": "cuOpt Nightly Bot",
+    "icon_emoji": ":robot_face:",
+    "blocks": blocks,
+}
+print(json.dumps(payload))
+PYEOF
+)
+
+echo "Sending Slack notification..."
+curl -s -X POST \
+    -H 'Content-type: application/json' \
+    --data "${PAYLOAD}" \
+    "${SLACK_WEBHOOK_URL}"
+
+echo ""
+echo "Slack notification sent."

From 043180fb407b6b6ed6f9d976d8ab53eabf313295 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 11:46:33 -0500
Subject: [PATCH 05/60] Add nightly-summary job and secrets to test workflow

Pass Slack webhook secret to all test jobs. Add nightly-summary job
that runs after all test jobs complete, aggregates results from S3,
sends a consolidated Slack notification, and uploads the dashboard.
Pass S3 and Slack secrets via container-options for the custom job.
---
 .github/workflows/test.yaml | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index a8cc5f2943..4b52dbffe3 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -41,6 +41,8 @@ jobs:
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+      script-env-secret-4-key: CUOPT_SLACK_WEBHOOK_URL
+      script-env-secret-4-value: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
   conda-python-tests:
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main
     with:
@@ -57,6 +59,8 @@ jobs:
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+      script-env-secret-4-key: CUOPT_SLACK_WEBHOOK_URL
+      script-env-secret-4-value: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
   wheel-tests-cuopt:
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
     with:
@@ -72,6 +76,8 @@ jobs:
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+      script-env-secret-4-key: CUOPT_SLACK_WEBHOOK_URL
+      script-env-secret-4-value: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
   wheel-tests-cuopt-server:
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
     with:
@@ -87,6 +93,8 @@ jobs:
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+      script-env-secret-4-key: CUOPT_SLACK_WEBHOOK_URL
+      script-env-secret-4-value: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
   conda-notebook-tests:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main
@@ -99,3 +107,29 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci-conda:26.06-latest"
       script: ci/test_notebooks.sh
+  nightly-summary:
+    if: ${{ always() && inputs.build_type == 'nightly' }}
+    needs:
+      - conda-cpp-tests
+      - conda-python-tests
+      - wheel-tests-cuopt
+      - wheel-tests-cuopt-server
+      - conda-notebook-tests
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main
+    with:
+      build_type: ${{ inputs.build_type }}
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
+      node_type: "cpu4"
+      arch: "amd64"
+      container_image: "rapidsai/ci-conda:26.06-latest"
+      container-options: >-
+        -e CUOPT_DATASET_S3_URI=${{ secrets.CUOPT_DATASET_S3_URI }}
+        -e CUOPT_AWS_ACCESS_KEY_ID=${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
+        -e CUOPT_AWS_SECRET_ACCESS_KEY=${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+        -e CUOPT_SLACK_WEBHOOK_URL=${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
+        -e CUOPT_SLACK_BOT_TOKEN=${{ secrets.CUOPT_SLACK_BOT_TOKEN }}
+        -e CUOPT_SLACK_CHANNEL_ID=${{ secrets.CUOPT_SLACK_CHANNEL_ID }}
+      script: ci/nightly_summary.sh

From a226607c5fb2d61b9348984b838affdd062eb9ed Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 11:47:19 -0500
Subject: [PATCH 06/60] Update developer skill with CI best practices

Add pitfall entries for cross-cutting change discipline: full scope
audits, code duplication, CI matrix parallelism, extensibility, and
actionable reporting.
---
 skills/cuopt-developer/SKILL.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/skills/cuopt-developer/SKILL.md b/skills/cuopt-developer/SKILL.md
index 99743f9171..98fe62f19c 100644
--- a/skills/cuopt-developer/SKILL.md
+++ b/skills/cuopt-developer/SKILL.md
@@ -295,6 +295,16 @@ rmm::device_uvector<int> data(100, stream);
 | Missing `nvcc` | Set `$CUDACXX` or add CUDA to `$PATH` |
 | CUDA out of memory | Reduce problem size |
 | Slow debug library loading | Device symbols cause delay |
+<!-- skill-evolution:start — Cross-cutting change discipline -->
+| CI state doesn't persist between runs | CI containers are ephemeral. Never write persistent state to repo files from CI — use S3 (`CUOPT_DATASET_S3_URI`) or artifact stores. Ask: "After this container dies, does tomorrow's run see today's data?" |
+| CI state transitions go unreported | When CI tracks state over time (e.g. test failures), every transition (new failure, recurring, stabilized) needs an explicit notification path. Ask: "When state X changes to Y, who learns about it and how?" |
+| Designing CI features without lifecycle check | Before shipping any CI feature that tracks state: (1) Where does state live between runs? (2) What writes/reads it? (3) What happens on state transitions? Verify end-to-end, not just the happy-path logic. |
+| Change applied to only some targets | Before implementing, audit the full scope of what needs the change. For CI: `ls ci/test*.sh`. For APIs: grep all callers. For patterns: find every instance. Enumerate ALL targets first, implement second. |
+| Shared resource ignores CI matrix parallelism | CI matrices run jobs in parallel across CUDA x Python x arch. Any shared resource (S3 paths, files, databases) must be keyed by the full execution context. Ask: "What happens when N parallel jobs access this simultaneously?" |
+| Same logic duplicated across files | When the same block (>10 lines) appears in 2+ places — any language, any context — extract a shared helper immediately. Don't duplicate first and refactor later. This applies to shell scripts, Python modules, C/C++ code equally. |
+| Feature not extensible for new variants | After implementing, ask: "If someone adds a new variant (test type, matrix entry, endpoint, etc.), what do they change?" If the answer is more than a one-line addition, the design needs a shared helper or auto-discovery. Avoid hardcoded lists of known variants. |
+| Reports generated without actionable detail | Reports and notifications must include enough context to act without digging: error messages, execution context (matrix, commit), history (new vs recurring), and links or attachments for full details. Provide downloadable artifacts when possible. |
+<!-- skill-evolution:end -->
 
 ## Canonical Documentation
 

From b6d13033a036ba335fff6e5c8fe0da47210df06b Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 12:10:42 -0500
Subject: [PATCH 07/60] Fix pre-commit: ruff format, copyright years,
 dependency files

Apply ruff formatting to Python files, update copyright years
to 2026 in shell scripts, regenerate conda environment files
and pyproject.toml from dependencies.yaml, and remove hardcoded
version from comment.
---
 ci/test_cpp.sh                                |   2 +-
 ci/test_notebooks.sh                          |   2 +-
 ci/utils/aggregate_nightly.py                 | 199 ++++++++-------
 ci/utils/nightly_report.py                    | 226 ++++++++++++------
 ci/utils/s3_helpers.py                        |  31 ++-
 ci/utils/send_nightly_summary.sh              |   2 +-
 .../all_cuda-129_arch-aarch64.yaml            |   1 +
 .../all_cuda-129_arch-x86_64.yaml             |   1 +
 .../all_cuda-131_arch-aarch64.yaml            |   1 +
 .../all_cuda-131_arch-x86_64.yaml             |   1 +
 .../cuopt/linear_programming/pyproject.toml   |   1 +
 python/cuopt/pyproject.toml                   |   1 +
 python/cuopt_self_hosted/pyproject.toml       |   1 +
 python/cuopt_server/pyproject.toml            |   1 +
 14 files changed, 308 insertions(+), 162 deletions(-)

diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index 4def832194..a68e0c7979 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index b58c9a1d32..0b2b339ba1 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index 56ade2796e..4517ab3c6a 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -37,6 +37,7 @@
 # Download and merge summaries
 # ---------------------------------------------------------------------------
 
+
 def download_summaries(s3_prefix, local_dir):
     """Download all JSON summaries from S3 prefix into local_dir.
     Returns list of loaded summary dicts."""
@@ -56,8 +57,10 @@ def download_summaries(s3_prefix, local_dir):
                 with open(local_path) as f:
                     summaries.append(json.load(f))
             except (json.JSONDecodeError, OSError) as exc:
-                print(f"WARNING: Failed to parse {local_path}: {exc}",
-                      file=sys.stderr)
+                print(
+                    f"WARNING: Failed to parse {local_path}: {exc}",
+                    file=sys.stderr,
+                )
     return summaries
 
 
@@ -70,8 +73,9 @@ def load_local_summaries(local_dir):
             with open(json_file) as f:
                 summaries.append(json.load(f))
         except (json.JSONDecodeError, OSError) as exc:
-            print(f"WARNING: Failed to parse {json_file}: {exc}",
-                  file=sys.stderr)
+            print(
+                f"WARNING: Failed to parse {json_file}: {exc}", file=sys.stderr
+            )
     return summaries
 
 
@@ -79,6 +83,7 @@ def load_local_summaries(local_dir):
 # Aggregation
 # ---------------------------------------------------------------------------
 
+
 def aggregate_summaries(summaries):
     """Merge per-matrix summaries into a consolidated view.
 
@@ -90,8 +95,12 @@ def aggregate_summaries(summaries):
     """
     grid = []
     totals = {
-        "total": 0, "passed": 0, "failed": 0,
-        "flaky": 0, "skipped": 0, "resolved": 0,
+        "total": 0,
+        "passed": 0,
+        "failed": 0,
+        "flaky": 0,
+        "skipped": 0,
+        "resolved": 0,
     }
     all_new_failures = []
     all_recurring_failures = []
@@ -117,13 +126,15 @@ def aggregate_summaries(summaries):
         else:
             status = "passed"
 
-        grid.append({
-            "test_type": test_type,
-            "matrix_label": matrix_label,
-            "status": status,
-            "counts": counts,
-            "sha": s.get("sha", ""),
-        })
+        grid.append(
+            {
+                "test_type": test_type,
+                "matrix_label": matrix_label,
+                "status": status,
+                "counts": counts,
+                "sha": s.get("sha", ""),
+            }
+        )
 
         # Accumulate totals
         for key in totals:
@@ -157,18 +168,15 @@ def aggregate_summaries(summaries):
 # Consolidated JSON
 # ---------------------------------------------------------------------------
 
+
 def generate_consolidated_json(agg, date_str, branch, github_run_url=""):
     """Generate the consolidated JSON for Slack and dashboard."""
     total_jobs = len(agg["matrix_grid"])
     failed_jobs = sum(
         1 for g in agg["matrix_grid"] if g["status"].startswith("failed")
     )
-    flaky_jobs = sum(
-        1 for g in agg["matrix_grid"] if g["status"] == "flaky"
-    )
-    passed_jobs = sum(
-        1 for g in agg["matrix_grid"] if g["status"] == "passed"
-    )
+    flaky_jobs = sum(1 for g in agg["matrix_grid"] if g["status"] == "flaky")
+    passed_jobs = sum(1 for g in agg["matrix_grid"] if g["status"] == "passed")
 
     return {
         "timestamp": datetime.now(timezone.utc).isoformat(),
@@ -195,9 +203,11 @@ def generate_consolidated_json(agg, date_str, branch, github_run_url=""):
 # Consolidated HTML
 # ---------------------------------------------------------------------------
 
+
 def _html_escape(text):
     return (
-        str(text).replace("&", "&amp;")
+        str(text)
+        .replace("&", "&amp;")
         .replace("<", "&lt;")
         .replace(">", "&gt;")
         .replace('"', "&quot;")
@@ -218,12 +228,16 @@ def _status_badge(status):
     return (
         f'<span style="display:inline-block;padding:3px 8px;border-radius:4px;'
         f'background:{bg};color:{text_color};font-size:0.75rem;font-weight:600">'
-        f'{label}</span>'
+        f"{label}</span>"
     )
 
 
 def generate_consolidated_html(
-    agg, date_str, branch, github_run_url="", s3_reports_prefix="",
+    agg,
+    date_str,
+    branch,
+    github_run_url="",
+    s3_reports_prefix="",
 ):
     """Generate a consolidated HTML dashboard for all matrix combos."""
     total_jobs = len(agg["matrix_grid"])
@@ -295,18 +309,18 @@ def generate_consolidated_html(
     if github_run_url:
         parts.append(
             f' &nbsp;|&nbsp; <a href="{_html_escape(github_run_url)}">'
-            f'GitHub Actions Run</a>'
+            f"GitHub Actions Run</a>"
         )
 
     parts.append(f"""</div>
 <div class="status-bar" style="background:{bar_color}">{bar_text}</div>
 <div class="summary-grid">
-  <div class="summary-card"><div class="num">{totals['total']}</div><div class="lbl">Total Tests</div></div>
-  <div class="summary-card"><div class="num pass">{totals['passed']}</div><div class="lbl">Passed</div></div>
-  <div class="summary-card"><div class="num fail">{totals['failed']}</div><div class="lbl">Failed</div></div>
-  <div class="summary-card"><div class="num flaky">{totals['flaky']}</div><div class="lbl">Flaky</div></div>
-  <div class="summary-card"><div class="num skip">{totals['skipped']}</div><div class="lbl">Skipped</div></div>
-  <div class="summary-card"><div class="num pass">{totals['resolved']}</div><div class="lbl">Stabilized</div></div>
+  <div class="summary-card"><div class="num">{totals["total"]}</div><div class="lbl">Total Tests</div></div>
+  <div class="summary-card"><div class="num pass">{totals["passed"]}</div><div class="lbl">Passed</div></div>
+  <div class="summary-card"><div class="num fail">{totals["failed"]}</div><div class="lbl">Failed</div></div>
+  <div class="summary-card"><div class="num flaky">{totals["flaky"]}</div><div class="lbl">Flaky</div></div>
+  <div class="summary-card"><div class="num skip">{totals["skipped"]}</div><div class="lbl">Skipped</div></div>
+  <div class="summary-card"><div class="num pass">{totals["resolved"]}</div><div class="lbl">Stabilized</div></div>
 </div>""")
 
     # --- Matrix grid ---
@@ -320,21 +334,19 @@ def generate_consolidated_html(
         # Build link to per-matrix HTML report on S3
         report_link = ""
         if s3_reports_prefix:
-            report_filename = (
-                f'{g["test_type"]}-{g["matrix_label"]}.html'
-            )
+            report_filename = f"{g['test_type']}-{g['matrix_label']}.html"
             report_link = (
                 f'<a class="matrix-link" href="{_html_escape(s3_reports_prefix)}'
                 f'{_html_escape(report_filename)}">View</a>'
             )
         parts.append(
-            f'<tr><td><strong>{_html_escape(g["test_type"])}</strong></td>'
-            f'<td><code>{_html_escape(g["matrix_label"])}</code></td>'
-            f'<td>{_status_badge(g["status"])}</td>'
-            f'<td>{counts.get("passed", 0)}</td>'
-            f'<td>{counts.get("failed", 0)}</td>'
-            f'<td>{counts.get("flaky", 0)}</td>'
-            f'<td>{counts.get("total", 0)}</td>'
+            f"<tr><td><strong>{_html_escape(g['test_type'])}</strong></td>"
+            f"<td><code>{_html_escape(g['matrix_label'])}</code></td>"
+            f"<td>{_status_badge(g['status'])}</td>"
+            f"<td>{counts.get('passed', 0)}</td>"
+            f"<td>{counts.get('failed', 0)}</td>"
+            f"<td>{counts.get('flaky', 0)}</td>"
+            f"<td>{counts.get('total', 0)}</td>"
             f"<td>{report_link}</td></tr>"
         )
     parts.append("</table></section>")
@@ -350,11 +362,11 @@ def generate_consolidated_html(
             msg = _html_escape(e.get("message", ""))
             short = _html_escape(e.get("message", "")[:100])
             parts.append(
-                f'<tr><td>{_html_escape(e["test_type"])}</td>'
-                f'<td><code>{_html_escape(e["matrix_label"])}</code></td>'
-                f'<td>{_html_escape(e["suite"])}</td>'
-                f'<td><code>{_html_escape(e["name"])}</code></td>'
-                f'<td><details><summary>{short}</summary>'
+                f"<tr><td>{_html_escape(e['test_type'])}</td>"
+                f"<td><code>{_html_escape(e['matrix_label'])}</code></td>"
+                f"<td>{_html_escape(e['suite'])}</td>"
+                f"<td><code>{_html_escape(e['name'])}</code></td>"
+                f"<td><details><summary>{short}</summary>"
                 f'<pre class="error">{msg}</pre></details></td></tr>'
             )
         parts.append("</table></section>")
@@ -370,12 +382,12 @@ def generate_consolidated_html(
             msg = _html_escape(e.get("message", ""))
             short = _html_escape(e.get("message", "")[:100])
             parts.append(
-                f'<tr><td>{_html_escape(e["test_type"])}</td>'
-                f'<td><code>{_html_escape(e["matrix_label"])}</code></td>'
-                f'<td>{_html_escape(e["suite"])}</td>'
-                f'<td><code>{_html_escape(e["name"])}</code></td>'
-                f'<td>{_html_escape(e.get("first_seen", "?"))}</td>'
-                f'<td><details><summary>{short}</summary>'
+                f"<tr><td>{_html_escape(e['test_type'])}</td>"
+                f"<td><code>{_html_escape(e['matrix_label'])}</code></td>"
+                f"<td>{_html_escape(e['suite'])}</td>"
+                f"<td><code>{_html_escape(e['name'])}</code></td>"
+                f"<td>{_html_escape(e.get('first_seen', '?'))}</td>"
+                f"<td><details><summary>{short}</summary>"
                 f'<pre class="error">{msg}</pre></details></td></tr>'
             )
         parts.append("</table></section>")
@@ -389,12 +401,12 @@ def generate_consolidated_html(
         )
         for e in agg["all_resolved_tests"]:
             parts.append(
-                f'<tr><td>{_html_escape(e["test_type"])}</td>'
-                f'<td><code>{_html_escape(e["matrix_label"])}</code></td>'
-                f'<td>{_html_escape(e["suite"])}</td>'
-                f'<td><code>{_html_escape(e["name"])}</code></td>'
-                f'<td>{_html_escape(e.get("first_seen", "?"))}</td>'
-                f'<td>{e.get("failure_count", "?")}</td></tr>'
+                f"<tr><td>{_html_escape(e['test_type'])}</td>"
+                f"<td><code>{_html_escape(e['matrix_label'])}</code></td>"
+                f"<td>{_html_escape(e['suite'])}</td>"
+                f"<td><code>{_html_escape(e['name'])}</code></td>"
+                f"<td>{_html_escape(e.get('first_seen', '?'))}</td>"
+                f"<td>{e.get('failure_count', '?')}</td></tr>"
             )
         parts.append("</table></section>")
 
@@ -407,11 +419,11 @@ def generate_consolidated_html(
         )
         for e in agg["all_flaky_tests"]:
             parts.append(
-                f'<tr><td>{_html_escape(e["test_type"])}</td>'
-                f'<td><code>{_html_escape(e["matrix_label"])}</code></td>'
-                f'<td>{_html_escape(e["suite"])}</td>'
-                f'<td><code>{_html_escape(e["name"])}</code></td>'
-                f'<td>{e.get("retry_count", "?")}</td></tr>'
+                f"<tr><td>{_html_escape(e['test_type'])}</td>"
+                f"<td><code>{_html_escape(e['matrix_label'])}</code></td>"
+                f"<td>{_html_escape(e['suite'])}</td>"
+                f"<td><code>{_html_escape(e['name'])}</code></td>"
+                f"<td>{e.get('retry_count', '?')}</td></tr>"
             )
         parts.append("</table></section>")
 
@@ -480,44 +492,54 @@ def update_index(s3_index_uri, date_str, consolidated, output_dir):
 # Main
 # ---------------------------------------------------------------------------
 
+
 def main():
     parser = argparse.ArgumentParser(
         description="Aggregate per-matrix nightly test summaries"
     )
     parser.add_argument(
-        "--s3-summaries-prefix", default="",
+        "--s3-summaries-prefix",
+        default="",
         help="S3 prefix for per-matrix JSON summaries (e.g., s3://bucket/.../summaries/2026-04-13/)",
     )
     parser.add_argument(
-        "--s3-reports-prefix", default="",
+        "--s3-reports-prefix",
+        default="",
         help="S3 prefix where per-matrix HTML reports live (for linking)",
     )
     parser.add_argument(
-        "--s3-output-uri", default="",
+        "--s3-output-uri",
+        default="",
         help="S3 URI to upload the consolidated JSON",
     )
     parser.add_argument(
-        "--s3-html-output-uri", default="",
+        "--s3-html-output-uri",
+        default="",
         help="S3 URI to upload the consolidated HTML report",
     )
     parser.add_argument(
-        "--s3-index-uri", default="",
+        "--s3-index-uri",
+        default="",
         help="S3 URI for the index.json that tracks all available dates (read + write)",
     )
     parser.add_argument(
-        "--s3-dashboard-uri", default="",
+        "--s3-dashboard-uri",
+        default="",
         help="S3 URI to upload the dashboard HTML (e.g., s3://bucket/.../dashboard/index.html)",
     )
     parser.add_argument(
-        "--dashboard-dir", default="",
+        "--dashboard-dir",
+        default="",
         help="Local directory containing dashboard files to upload",
     )
     parser.add_argument(
-        "--local-summaries-dir", default="",
+        "--local-summaries-dir",
+        default="",
         help="Local directory with JSON summaries (alternative to S3, for testing)",
     )
     parser.add_argument(
-        "--output-dir", default="aggregate-output",
+        "--output-dir",
+        default="aggregate-output",
         help="Local directory to write output files",
     )
     parser.add_argument(
@@ -527,7 +549,9 @@ def main():
     )
     parser.add_argument("--branch", default="main", help="Branch name")
     parser.add_argument(
-        "--github-run-url", default="", help="URL to the GitHub Actions run",
+        "--github-run-url",
+        default="",
+        help="URL to the GitHub Actions run",
     )
 
     args = parser.parse_args()
@@ -541,13 +565,17 @@ def main():
         download_dir = output_dir / "downloaded_summaries"
         summaries = download_summaries(args.s3_summaries_prefix, download_dir)
     else:
-        print("ERROR: Provide --s3-summaries-prefix or --local-summaries-dir",
-              file=sys.stderr)
+        print(
+            "ERROR: Provide --s3-summaries-prefix or --local-summaries-dir",
+            file=sys.stderr,
+        )
         return 1
 
     if not summaries:
-        print("WARNING: No summaries found. Generating empty report.",
-              file=sys.stderr)
+        print(
+            "WARNING: No summaries found. Generating empty report.",
+            file=sys.stderr,
+        )
 
     print(f"Loaded {len(summaries)} matrix summary file(s)")
 
@@ -562,7 +590,10 @@ def main():
 
     # ---- Step 3: Generate outputs ----
     consolidated = generate_consolidated_json(
-        agg, args.date, args.branch, args.github_run_url,
+        agg,
+        args.date,
+        args.branch,
+        args.github_run_url,
     )
 
     json_path = output_dir / "consolidated_summary.json"
@@ -570,7 +601,10 @@ def main():
     print(f"Consolidated JSON written to {json_path}")
 
     html_report = generate_consolidated_html(
-        agg, args.date, args.branch, args.github_run_url,
+        agg,
+        args.date,
+        args.branch,
+        args.github_run_url,
         args.s3_reports_prefix,
     )
     html_path = output_dir / "consolidated_report.html"
@@ -586,7 +620,10 @@ def main():
     # ---- Step 5: Update index.json ----
     if args.s3_index_uri:
         update_index(
-            args.s3_index_uri, args.date, consolidated, output_dir,
+            args.s3_index_uri,
+            args.date,
+            consolidated,
+            output_dir,
         )
 
     # ---- Step 6: Upload dashboard ----
@@ -595,8 +632,10 @@ def main():
         if dashboard_file.exists():
             s3_upload(str(dashboard_file), args.s3_dashboard_uri)
         else:
-            print(f"WARNING: Dashboard not found at {dashboard_file}",
-                  file=sys.stderr)
+            print(
+                f"WARNING: Dashboard not found at {dashboard_file}",
+                file=sys.stderr,
+            )
 
     return 0
 
diff --git a/ci/utils/nightly_report.py b/ci/utils/nightly_report.py
index 40e2e65798..c098b4d66c 100755
--- a/ci/utils/nightly_report.py
+++ b/ci/utils/nightly_report.py
@@ -54,6 +54,7 @@
 # JUnit XML parsing
 # ---------------------------------------------------------------------------
 
+
 def parse_junit_xml(xml_path):
     """Parse a JUnit XML file and return a list of test result dicts."""
     results = []
@@ -100,15 +101,17 @@ def parse_junit_xml(xml_path):
                 status = "passed"
                 message = ""
 
-            results.append({
-                "suite": suite_name,
-                "classname": classname,
-                "name": name,
-                "status": status,
-                "time": time_taken,
-                "message": message,
-                "source_file": str(xml_path),
-            })
+            results.append(
+                {
+                    "suite": suite_name,
+                    "classname": classname,
+                    "name": name,
+                    "status": status,
+                    "time": time_taken,
+                    "message": message,
+                    "source_file": str(xml_path),
+                }
+            )
 
     return results
 
@@ -126,6 +129,7 @@ def collect_all_results(results_dir):
 # Classification
 # ---------------------------------------------------------------------------
 
+
 def classify_failures(results):
     """
     Classify test results into passed, failed, flaky, skipped, and error.
@@ -173,6 +177,7 @@ def classify_failures(results):
 # History management
 # ---------------------------------------------------------------------------
 
+
 def load_history(history_path):
     """Load failure history from a local JSON file."""
     try:
@@ -257,14 +262,16 @@ def update_history(history, classified, sha, date_str):
             rec["status"] = "resolved"
             rec["resolved_date"] = date_str
             rec["resolved_sha"] = sha
-            resolved_tests.append({
-                "suite": rec["suite"],
-                "classname": rec["classname"],
-                "name": rec["name"],
-                "first_seen": rec["first_seen_date"],
-                "failure_count": rec["failure_count"],
-                "was_flaky": rec.get("is_flaky", False),
-            })
+            resolved_tests.append(
+                {
+                    "suite": rec["suite"],
+                    "classname": rec["classname"],
+                    "name": rec["name"],
+                    "first_seen": rec["first_seen_date"],
+                    "failure_count": rec["failure_count"],
+                    "was_flaky": rec.get("is_flaky", False),
+                }
+            )
 
     return history, new_failures, recurring_failures, resolved_tests
 
@@ -280,9 +287,17 @@ def save_history(history, history_path):
 # Report generation
 # ---------------------------------------------------------------------------
 
+
 def generate_markdown_report(
-    classified, new_failures, recurring_failures, resolved_tests, history,
-    test_type="", matrix_label="", sha="", date_str="",
+    classified,
+    new_failures,
+    recurring_failures,
+    resolved_tests,
+    history,
+    test_type="",
+    matrix_label="",
+    sha="",
+    date_str="",
 ):
     """Generate a Markdown summary report."""
     lines = []
@@ -320,7 +335,9 @@ def generate_markdown_report(
     lines.append(f"| Flaky (passed on retry) | {total_flaky} |")
     lines.append(f"| Skipped | {total_skipped} |")
     if resolved_tests:
-        lines.append(f"| **Stabilized (were failing, now pass)** | **{len(resolved_tests)}** |")
+        lines.append(
+            f"| **Stabilized (were failing, now pass)** | **{len(resolved_tests)}** |"
+        )
     lines.append("")
 
     # -- New genuine failures (highest priority) --
@@ -331,9 +348,13 @@ def generate_markdown_report(
         lines.append("|-------|------|-------|")
         for entry in new_failures:
             short_msg = (
-                entry.get("message", "")[:80].replace("\n", " ").replace("|", "\\|")
+                entry.get("message", "")[:80]
+                .replace("\n", " ")
+                .replace("|", "\\|")
+            )
+            lines.append(
+                f"| {entry['suite']} | `{entry['name']}` | {short_msg} |"
             )
-            lines.append(f"| {entry['suite']} | `{entry['name']}` | {short_msg} |")
         lines.append("")
 
     # -- Recurring failures --
@@ -344,11 +365,19 @@ def generate_markdown_report(
         lines.append("|-------|------|------------|---------------|-------|")
         for entry in recurring_failures:
             short_msg = (
-                entry.get("message", "")[:60].replace("\n", " ").replace("|", "\\|")
+                entry.get("message", "")[:60]
+                .replace("\n", " ")
+                .replace("|", "\\|")
             )
             first_seen = entry.get("first_seen", "unknown")
-            test_key = f"{entry['suite']}::{entry['classname']}::{entry['name']}"
-            count = history.get("tests", {}).get(test_key, {}).get("failure_count", "?")
+            test_key = (
+                f"{entry['suite']}::{entry['classname']}::{entry['name']}"
+            )
+            count = (
+                history.get("tests", {})
+                .get(test_key, {})
+                .get("failure_count", "?")
+            )
             lines.append(
                 f"| {entry['suite']} | `{entry['name']}` | {first_seen} | {count} | {short_msg} |"
             )
@@ -358,8 +387,12 @@ def generate_markdown_report(
     if resolved_tests:
         lines.append("## Stabilized Tests (were failing, now passing)")
         lines.append("")
-        lines.append("| Suite | Test | Was failing since | Total failure count | Was flaky? |")
-        lines.append("|-------|------|-------------------|---------------------|------------|")
+        lines.append(
+            "| Suite | Test | Was failing since | Total failure count | Was flaky? |"
+        )
+        lines.append(
+            "|-------|------|-------------------|---------------------|------------|"
+        )
         for entry in resolved_tests:
             flaky_badge = "Yes" if entry.get("was_flaky") else "No"
             lines.append(
@@ -376,7 +409,9 @@ def generate_markdown_report(
         lines.append("|-------|------|----------------|")
         for entry in classified["flaky"]:
             retry_count = entry.get("retry_count", "?")
-            lines.append(f"| {entry['suite']} | `{entry['name']}` | {retry_count} |")
+            lines.append(
+                f"| {entry['suite']} | `{entry['name']}` | {retry_count} |"
+            )
         lines.append("")
 
     # -- Detailed errors --
@@ -405,8 +440,14 @@ def generate_markdown_report(
 
 
 def generate_json_summary(
-    classified, new_failures, recurring_failures, resolved_tests,
-    test_type="", matrix_label="", sha="", date_str="",
+    classified,
+    new_failures,
+    recurring_failures,
+    resolved_tests,
+    test_type="",
+    matrix_label="",
+    sha="",
+    date_str="",
 ):
     """Generate a JSON summary for downstream tools (Slack notifier, dashboard)."""
     return {
@@ -470,6 +511,7 @@ def generate_json_summary(
 # HTML report
 # ---------------------------------------------------------------------------
 
+
 def _html_escape(text):
     """Escape HTML special characters."""
     return (
@@ -481,8 +523,15 @@ def _html_escape(text):
 
 
 def generate_html_report(
-    classified, new_failures, recurring_failures, resolved_tests, history,
-    test_type="", matrix_label="", sha="", date_str="",
+    classified,
+    new_failures,
+    recurring_failures,
+    resolved_tests,
+    history,
+    test_type="",
+    matrix_label="",
+    sha="",
+    date_str="",
 ):
     """Generate a self-contained HTML report with detailed failure info."""
     total_passed = len(classified["passed"])
@@ -566,7 +615,9 @@ def generate_html_report(
     if sha:
         meta_parts.append(f"Commit: <code>{_html_escape(sha[:12])}</code>")
     if matrix_label:
-        meta_parts.append(f"Matrix: <strong>{_html_escape(matrix_label)}</strong>")
+        meta_parts.append(
+            f"Matrix: <strong>{_html_escape(matrix_label)}</strong>"
+        )
     parts.append(" &nbsp;|&nbsp; ".join(meta_parts))
 
     parts.append(f"""</div>
@@ -582,23 +633,23 @@ def generate_html_report(
 
     # --- New failures ---
     if new_failures:
-        parts.append('<section><h2>New Failures</h2><table>')
-        parts.append('<tr><th>Suite</th><th>Test</th><th>Error</th></tr>')
+        parts.append("<section><h2>New Failures</h2><table>")
+        parts.append("<tr><th>Suite</th><th>Test</th><th>Error</th></tr>")
         for e in new_failures:
             msg = _html_escape(e.get("message", ""))
             short = _html_escape(e.get("message", "")[:100])
             parts.append(
-                f'<tr><td>{_html_escape(e["suite"])}</td>'
-                f'<td><code>{_html_escape(e["name"])}</code> '
+                f"<tr><td>{_html_escape(e['suite'])}</td>"
+                f"<td><code>{_html_escape(e['name'])}</code> "
                 f'<span class="badge badge-new">NEW</span></td>'
-                f'<td><details><summary>{short}</summary>'
+                f"<td><details><summary>{short}</summary>"
                 f'<pre class="error">{msg}</pre></details></td></tr>'
             )
         parts.append("</table></section>")
 
     # --- Recurring failures ---
     if recurring_failures:
-        parts.append('<section><h2>Recurring Failures</h2><table>')
+        parts.append("<section><h2>Recurring Failures</h2><table>")
         parts.append(
             "<tr><th>Suite</th><th>Test</th><th>First Seen</th>"
             "<th>Count</th><th>Error</th></tr>"
@@ -608,22 +659,24 @@ def generate_html_report(
             short = _html_escape(e.get("message", "")[:100])
             first_seen = _html_escape(e.get("first_seen", "unknown"))
             test_key = f"{e['suite']}::{e['classname']}::{e['name']}"
-            count = history.get("tests", {}).get(test_key, {}).get(
-                "failure_count", "?"
+            count = (
+                history.get("tests", {})
+                .get(test_key, {})
+                .get("failure_count", "?")
             )
             parts.append(
-                f'<tr><td>{_html_escape(e["suite"])}</td>'
-                f'<td><code>{_html_escape(e["name"])}</code> '
+                f"<tr><td>{_html_escape(e['suite'])}</td>"
+                f"<td><code>{_html_escape(e['name'])}</code> "
                 f'<span class="badge badge-recurring">RECURRING</span></td>'
                 f"<td>{first_seen}</td><td>{count}</td>"
-                f'<td><details><summary>{short}</summary>'
+                f"<td><details><summary>{short}</summary>"
                 f'<pre class="error">{msg}</pre></details></td></tr>'
             )
         parts.append("</table></section>")
 
     # --- Stabilized ---
     if resolved_tests:
-        parts.append('<section><h2>Stabilized Tests</h2><table>')
+        parts.append("<section><h2>Stabilized Tests</h2><table>")
         parts.append(
             "<tr><th>Suite</th><th>Test</th><th>Failing Since</th>"
             "<th>Failure Count</th><th>Was Flaky?</th></tr>"
@@ -631,25 +684,25 @@ def generate_html_report(
         for e in resolved_tests:
             flaky_tag = "Yes" if e.get("was_flaky") else "No"
             parts.append(
-                f'<tr><td>{_html_escape(e["suite"])}</td>'
-                f'<td><code>{_html_escape(e["name"])}</code> '
+                f"<tr><td>{_html_escape(e['suite'])}</td>"
+                f"<td><code>{_html_escape(e['name'])}</code> "
                 f'<span class="badge badge-resolved">FIXED</span></td>'
-                f'<td>{_html_escape(e.get("first_seen", "?"))}</td>'
-                f'<td>{e.get("failure_count", "?")}</td>'
+                f"<td>{_html_escape(e.get('first_seen', '?'))}</td>"
+                f"<td>{e.get('failure_count', '?')}</td>"
                 f"<td>{flaky_tag}</td></tr>"
             )
         parts.append("</table></section>")
 
     # --- Flaky ---
     if classified["flaky"]:
-        parts.append('<section><h2>Flaky Tests (passed on retry)</h2><table>')
+        parts.append("<section><h2>Flaky Tests (passed on retry)</h2><table>")
         parts.append("<tr><th>Suite</th><th>Test</th><th>Retries</th></tr>")
         for e in classified["flaky"]:
             parts.append(
-                f'<tr><td>{_html_escape(e["suite"])}</td>'
-                f'<td><code>{_html_escape(e["name"])}</code> '
+                f"<tr><td>{_html_escape(e['suite'])}</td>"
+                f"<td><code>{_html_escape(e['name'])}</code> "
                 f'<span class="badge badge-flaky">FLAKY</span></td>'
-                f'<td>{e.get("retry_count", "?")}</td></tr>'
+                f"<td>{e.get('retry_count', '?')}</td></tr>"
             )
         parts.append("</table></section>")
 
@@ -661,17 +714,19 @@ def generate_html_report(
             msg = _html_escape(e.get("message", "").strip())
             parts.append(
                 f'<h3 style="font-size:0.95rem;margin-top:16px">'
-                f'<code>{_html_escape(e["classname"])}::{_html_escape(e["name"])}</code></h3>'
+                f"<code>{_html_escape(e['classname'])}::{_html_escape(e['name'])}</code></h3>"
                 f'<p style="font-size:0.82rem;color:#616161">'
-                f'Suite: {_html_escape(e["suite"])} &nbsp;|&nbsp; '
-                f'Source: {_html_escape(e["source_file"])}</p>'
+                f"Suite: {_html_escape(e['suite'])} &nbsp;|&nbsp; "
+                f"Source: {_html_escape(e['source_file'])}</p>"
             )
             if msg:
                 parts.append(f'<pre class="error">{msg}</pre>')
         parts.append("</section>")
 
     if not all_failures and not classified["flaky"] and not resolved_tests:
-        parts.append('<p class="empty">All tests passed! No failures or flaky tests detected.</p>')
+        parts.append(
+            '<p class="empty">All tests passed! No failures or flaky tests detected.</p>'
+        )
 
     parts.append("</body></html>")
     return "\n".join(parts)
@@ -681,42 +736,50 @@ def generate_html_report(
 # Main
 # ---------------------------------------------------------------------------
 
+
 def main():
     parser = argparse.ArgumentParser(
         description="Generate nightly test failure report from JUnit XML results"
     )
     parser.add_argument(
-        "--results-dir", required=True,
+        "--results-dir",
+        required=True,
         help="Directory containing JUnit XML test result files",
     )
     parser.add_argument(
-        "--output-dir", default="report-output",
+        "--output-dir",
+        default="report-output",
         help="Directory to write report files to",
     )
     parser.add_argument(
-        "--sha", default=os.environ.get("GITHUB_SHA", "unknown"),
+        "--sha",
+        default=os.environ.get("GITHUB_SHA", "unknown"),
         help="Git commit SHA for this run",
     )
     parser.add_argument(
-        "--date", default=datetime.now(timezone.utc).strftime("%Y-%m-%d"),
+        "--date",
+        default=datetime.now(timezone.utc).strftime("%Y-%m-%d"),
         help="Date for this run (YYYY-MM-DD)",
     )
     parser.add_argument(
-        "--test-type", default="unknown",
+        "--test-type",
+        default="unknown",
         help=(
             "Test type identifier (e.g., cpp, python, wheel-python, "
             "wheel-server, notebooks)"
         ),
     )
     parser.add_argument(
-        "--matrix-label", default="",
+        "--matrix-label",
+        default="",
         help=(
             "Matrix combination label (e.g., cuda12.9-py3.12-x86_64). "
             "Included in reports and JSON summary to identify the CI job."
         ),
     )
     parser.add_argument(
-        "--s3-history-uri", default="",
+        "--s3-history-uri",
+        default="",
         help=(
             "S3 URI for persistent failure history JSON. "
             "Downloaded before analysis, uploaded after update. "
@@ -725,7 +788,8 @@ def main():
         ),
     )
     parser.add_argument(
-        "--s3-summary-uri", default="",
+        "--s3-summary-uri",
+        default="",
         help=(
             "S3 URI to upload this run's JSON snapshot for aggregation. "
             "Example: s3://bucket/ci_test_reports/nightly/summaries/"
@@ -733,7 +797,8 @@ def main():
         ),
     )
     parser.add_argument(
-        "--s3-html-uri", default="",
+        "--s3-html-uri",
+        default="",
         help=(
             "S3 URI to upload the HTML report. "
             "Example: s3://bucket/ci_test_reports/nightly/reports/"
@@ -780,7 +845,9 @@ def main():
     )
 
     if resolved_tests:
-        print(f"Stabilized: {len(resolved_tests)} previously-failing test(s) now pass")
+        print(
+            f"Stabilized: {len(resolved_tests)} previously-failing test(s) now pass"
+        )
 
     save_history(history, local_history_path)
     print(f"Updated local history at {local_history_path}")
@@ -798,7 +865,11 @@ def main():
     )
 
     md_report = generate_markdown_report(
-        classified, new_failures, recurring_failures, resolved_tests, history,
+        classified,
+        new_failures,
+        recurring_failures,
+        resolved_tests,
+        history,
         **report_kwargs,
     )
     md_path = output_dir / "nightly_report.md"
@@ -806,7 +877,11 @@ def main():
     print(f"Markdown report written to {md_path}")
 
     html_report = generate_html_report(
-        classified, new_failures, recurring_failures, resolved_tests, history,
+        classified,
+        new_failures,
+        recurring_failures,
+        resolved_tests,
+        history,
         **report_kwargs,
     )
     html_path = output_dir / "nightly_report.html"
@@ -814,7 +889,10 @@ def main():
     print(f"HTML report written to {html_path}")
 
     json_summary = generate_json_summary(
-        classified, new_failures, recurring_failures, resolved_tests,
+        classified,
+        new_failures,
+        recurring_failures,
+        resolved_tests,
         **report_kwargs,
     )
     json_path = output_dir / "nightly_summary.json"
@@ -836,10 +914,14 @@ def main():
     # ---- Exit code ----
     genuine_failures = len(classified["failed"]) + len(classified["error"])
     if genuine_failures > 0:
-        print(f"\nFAILED: {genuine_failures} genuine test failure(s) detected.")
+        print(
+            f"\nFAILED: {genuine_failures} genuine test failure(s) detected."
+        )
         return 1
     if classified["flaky"]:
-        print(f"\nWARNING: All tests passed but {len(classified['flaky'])} flaky test(s) detected.")
+        print(
+            f"\nWARNING: All tests passed but {len(classified['flaky'])} flaky test(s) detected."
+        )
     else:
         print("\nAll tests passed.")
     return 0
diff --git a/ci/utils/s3_helpers.py b/ci/utils/s3_helpers.py
index f1f5795661..572b61a409 100644
--- a/ci/utils/s3_helpers.py
+++ b/ci/utils/s3_helpers.py
@@ -20,7 +20,9 @@ def s3_env():
     if os.environ.get("CUOPT_AWS_ACCESS_KEY_ID"):
         env["AWS_ACCESS_KEY_ID"] = os.environ["CUOPT_AWS_ACCESS_KEY_ID"]
     if os.environ.get("CUOPT_AWS_SECRET_ACCESS_KEY"):
-        env["AWS_SECRET_ACCESS_KEY"] = os.environ["CUOPT_AWS_SECRET_ACCESS_KEY"]
+        env["AWS_SECRET_ACCESS_KEY"] = os.environ[
+            "CUOPT_AWS_SECRET_ACCESS_KEY"
+        ]
     if os.environ.get("CUOPT_AWS_REGION"):
         env["AWS_DEFAULT_REGION"] = os.environ["CUOPT_AWS_REGION"]
     elif "AWS_DEFAULT_REGION" not in env:
@@ -34,12 +36,17 @@ def s3_download(s3_uri, local_path):
     try:
         subprocess.run(
             ["aws", "s3", "cp", s3_uri, local_path],
-            env=env, check=True, capture_output=True, text=True,
+            env=env,
+            check=True,
+            capture_output=True,
+            text=True,
         )
         print(f"Downloaded {s3_uri}")
         return True
     except FileNotFoundError:
-        print("WARNING: aws CLI not found, skipping S3 download", file=sys.stderr)
+        print(
+            "WARNING: aws CLI not found, skipping S3 download", file=sys.stderr
+        )
         return False
     except subprocess.CalledProcessError as exc:
         print(
@@ -55,15 +62,22 @@ def s3_upload(local_path, s3_uri):
     try:
         subprocess.run(
             ["aws", "s3", "cp", local_path, s3_uri],
-            env=env, check=True, capture_output=True, text=True,
+            env=env,
+            check=True,
+            capture_output=True,
+            text=True,
         )
         print(f"Uploaded {local_path} to {s3_uri}")
         return True
     except FileNotFoundError:
-        print("WARNING: aws CLI not found, skipping S3 upload", file=sys.stderr)
+        print(
+            "WARNING: aws CLI not found, skipping S3 upload", file=sys.stderr
+        )
         return False
     except subprocess.CalledProcessError as exc:
-        print(f"WARNING: S3 upload failed: {exc.stderr.strip()}", file=sys.stderr)
+        print(
+            f"WARNING: S3 upload failed: {exc.stderr.strip()}", file=sys.stderr
+        )
         return False
 
 
@@ -73,7 +87,10 @@ def s3_list(s3_prefix):
     try:
         result = subprocess.run(
             ["aws", "s3", "ls", s3_prefix],
-            env=env, check=True, capture_output=True, text=True,
+            env=env,
+            check=True,
+            capture_output=True,
+            text=True,
         )
     except (FileNotFoundError, subprocess.CalledProcessError) as exc:
         print(f"WARNING: S3 ls failed: {exc}", file=sys.stderr)
diff --git a/ci/utils/send_nightly_summary.sh b/ci/utils/send_nightly_summary.sh
index 7c2d16519c..7b39a02cec 100755
--- a/ci/utils/send_nightly_summary.sh
+++ b/ci/utils/send_nightly_summary.sh
@@ -17,7 +17,7 @@
 # Optional environment variables:
 #   GITHUB_RUN_URL     - Link to the GitHub Actions run
 #   REPORT_URL         - Link to the S3 HTML report
-#   CUOPT_BRANCH       - Branch name (e.g. main, release/26.06)
+#   CUOPT_BRANCH       - Branch name (e.g. main)
 
 set -euo pipefail
 
diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 04dc6bb83c..e8000ffbb3 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -58,6 +58,7 @@ dependencies:
 - pylibraft==26.6.*,>=0.0.0a0
 - pyrsistent
 - pytest-cov
+- pytest-rerunfailures
 - pytest<9.0
 - python>=3.11,<3.15
 - pyyaml>=6.0.0
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 21891cc9f2..43bc8996ad 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -58,6 +58,7 @@ dependencies:
 - pylibraft==26.6.*,>=0.0.0a0
 - pyrsistent
 - pytest-cov
+- pytest-rerunfailures
 - pytest<9.0
 - python>=3.11,<3.15
 - pyyaml>=6.0.0
diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml
index 89147b18a7..5a53e13d37 100644
--- a/conda/environments/all_cuda-131_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-131_arch-aarch64.yaml
@@ -58,6 +58,7 @@ dependencies:
 - pylibraft==26.6.*,>=0.0.0a0
 - pyrsistent
 - pytest-cov
+- pytest-rerunfailures
 - pytest<9.0
 - python>=3.11,<3.15
 - pyyaml>=6.0.0
diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml
index 8df6f28bf7..2efc26c0cb 100644
--- a/conda/environments/all_cuda-131_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-131_arch-x86_64.yaml
@@ -58,6 +58,7 @@ dependencies:
 - pylibraft==26.6.*,>=0.0.0a0
 - pyrsistent
 - pytest-cov
+- pytest-rerunfailures
 - pytest<9.0
 - python>=3.11,<3.15
 - pyyaml>=6.0.0
diff --git a/python/cuopt/cuopt/linear_programming/pyproject.toml b/python/cuopt/cuopt/linear_programming/pyproject.toml
index 934b12f547..6e2c59c43c 100644
--- a/python/cuopt/cuopt/linear_programming/pyproject.toml
+++ b/python/cuopt/cuopt/linear_programming/pyproject.toml
@@ -37,6 +37,7 @@ Source = "https://github.com/nvidia/cuopt"
 [project.optional-dependencies]
 test = [
     "pytest-cov",
+    "pytest-rerunfailures",
     "pytest<9.0",
     "rapids-logger==0.2.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cuopt/pyproject.toml b/python/cuopt/pyproject.toml
index eff7e01769..18b6e75276 100644
--- a/python/cuopt/pyproject.toml
+++ b/python/cuopt/pyproject.toml
@@ -47,6 +47,7 @@ classifiers = [
 test = [
     "numpy>=1.23.5,<3.0",
     "pytest-cov",
+    "pytest-rerunfailures",
     "pytest<9.0",
     "rapids-logger==0.2.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cuopt_self_hosted/pyproject.toml b/python/cuopt_self_hosted/pyproject.toml
index 43aa80a5b3..f4a3b75a60 100644
--- a/python/cuopt_self_hosted/pyproject.toml
+++ b/python/cuopt_self_hosted/pyproject.toml
@@ -37,6 +37,7 @@ classifiers = [
 [project.optional-dependencies]
 test = [
     "pytest-cov",
+    "pytest-rerunfailures",
     "pytest<9.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
diff --git a/python/cuopt_server/pyproject.toml b/python/cuopt_server/pyproject.toml
index ce96c884be..4f9f141011 100644
--- a/python/cuopt_server/pyproject.toml
+++ b/python/cuopt_server/pyproject.toml
@@ -48,6 +48,7 @@ test = [
     "msgpack==1.1.2",
     "pexpect",
     "pytest-cov",
+    "pytest-rerunfailures",
     "pytest<9.0",
     "requests",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

From 6b7605b900cc35fea9f1cb9ab3e61b4f915d62a1 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 12:23:08 -0500
Subject: [PATCH 08/60] Fix nightly-summary job: remove unsupported
 container-options secrets

custom-job.yaml does not support secret references in container-options.
Remove them and make nightly_summary.sh gracefully skip when
CUOPT_DATASET_S3_URI is not available.
---
 .github/workflows/test.yaml |  7 -------
 ci/nightly_summary.sh       | 11 +++++------
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 4b52dbffe3..fcf1c5f42f 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -125,11 +125,4 @@ jobs:
       node_type: "cpu4"
       arch: "amd64"
       container_image: "rapidsai/ci-conda:26.06-latest"
-      container-options: >-
-        -e CUOPT_DATASET_S3_URI=${{ secrets.CUOPT_DATASET_S3_URI }}
-        -e CUOPT_AWS_ACCESS_KEY_ID=${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
-        -e CUOPT_AWS_SECRET_ACCESS_KEY=${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
-        -e CUOPT_SLACK_WEBHOOK_URL=${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
-        -e CUOPT_SLACK_BOT_TOKEN=${{ secrets.CUOPT_SLACK_BOT_TOKEN }}
-        -e CUOPT_SLACK_CHANNEL_ID=${{ secrets.CUOPT_SLACK_CHANNEL_ID }}
       script: ci/nightly_summary.sh
diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index 93576e1795..53075d18c6 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -6,10 +6,8 @@
 # consolidated Slack notification.  Runs as a post-test job after all
 # matrix CI jobs finish.
 #
-# Required environment variables:
-#   CUOPT_DATASET_S3_URI          - S3 base URI
-#   CUOPT_AWS_ACCESS_KEY_ID       - AWS credentials
-#   CUOPT_AWS_SECRET_ACCESS_KEY
+# The script needs S3 access. It tries CUOPT_DATASET_S3_URI first, then
+# falls back to standard AWS env vars set by aws-actions/configure-aws-credentials.
 #
 # Optional:
 #   CUOPT_SLACK_WEBHOOK_URL       - sends Slack if set
@@ -28,8 +26,9 @@ BRANCH="${RAPIDS_BRANCH:-main}"
 GITHUB_RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY:-NVIDIA/cuopt}/actions/runs/${GITHUB_RUN_ID:-}"
 
 if [ -z "${CUOPT_DATASET_S3_URI:-}" ]; then
-    echo "ERROR: CUOPT_DATASET_S3_URI is not set. Cannot aggregate." >&2
-    exit 1
+    echo "WARNING: CUOPT_DATASET_S3_URI is not set. Skipping nightly aggregation." >&2
+    echo "The per-matrix reports (uploaded by individual test jobs) are still available on S3."
+    exit 0
 fi
 
 S3_BASE="${CUOPT_DATASET_S3_URI}ci_test_reports/nightly"

From 84069f8b68e3b5fbc045caeb577f066f04f106c6 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 14:32:19 -0500
Subject: [PATCH 09/60] Remove unsupported script-env-secret-4 from test
 workflow

The shared workflows only support 3 secret slots. The Slack webhook
is only needed by the nightly-summary aggregation job which uses
secrets: inherit.
---
 .github/workflows/test.yaml | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index fcf1c5f42f..7de1f43bbb 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -41,8 +41,6 @@ jobs:
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
-      script-env-secret-4-key: CUOPT_SLACK_WEBHOOK_URL
-      script-env-secret-4-value: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
   conda-python-tests:
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main
     with:
@@ -59,8 +57,6 @@ jobs:
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
-      script-env-secret-4-key: CUOPT_SLACK_WEBHOOK_URL
-      script-env-secret-4-value: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
   wheel-tests-cuopt:
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
     with:
@@ -76,8 +72,6 @@ jobs:
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
-      script-env-secret-4-key: CUOPT_SLACK_WEBHOOK_URL
-      script-env-secret-4-value: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
   wheel-tests-cuopt-server:
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
     with:
@@ -93,8 +87,6 @@ jobs:
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
-      script-env-secret-4-key: CUOPT_SLACK_WEBHOOK_URL
-      script-env-secret-4-value: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
   conda-notebook-tests:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main

From 9ae08520f5e41b29cf35c7e99be2446a1351e518 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 16:01:02 -0500
Subject: [PATCH 10/60] Add bounce detection and cross-run flaky classification
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tests that resolve then fail again within 14 days are recognized as
bouncing rather than new failures. After 2+ bounces a test is
automatically classified as cross-run flaky. Resolved tests only
generate one notification — subsequent passes are silent.
---
 ci/utils/nightly_report.py | 102 ++++++++++++++++++++++++++++++++-----
 1 file changed, 88 insertions(+), 14 deletions(-)

diff --git a/ci/utils/nightly_report.py b/ci/utils/nightly_report.py
index c098b4d66c..a64f8f5a28 100755
--- a/ci/utils/nightly_report.py
+++ b/ci/utils/nightly_report.py
@@ -47,7 +47,14 @@
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 from s3_helpers import s3_download, s3_upload  # noqa: E402
 
-EMPTY_HISTORY = {"_schema_version": 1, "tests": {}}
+EMPTY_HISTORY = {"_schema_version": 2, "tests": {}}
+
+# A test that resolves then fails again within this window is considered
+# "bouncing" (intermittently flaky) rather than a new failure.
+BOUNCE_WINDOW_DAYS = 14
+
+# Number of failure/resolve cycles that classify a test as cross-run flaky.
+BOUNCE_THRESHOLD = 2
 
 
 # ---------------------------------------------------------------------------
@@ -190,12 +197,37 @@ def load_history(history_path):
     return dict(EMPTY_HISTORY)
 
 
+def _days_between(date_a, date_b):
+    """Return absolute number of days between two YYYY-MM-DD strings."""
+    try:
+        a = datetime.strptime(date_a, "%Y-%m-%d")
+        b = datetime.strptime(date_b, "%Y-%m-%d")
+        return abs((a - b).days)
+    except (ValueError, TypeError):
+        return 999
+
+
+def _is_recent_resolve(rec, date_str):
+    """Check if a test was resolved recently (within bounce window)."""
+    resolved_date = rec.get("resolved_date", "")
+    if not resolved_date:
+        return False
+    return _days_between(resolved_date, date_str) <= BOUNCE_WINDOW_DAYS
+
+
 def update_history(history, classified, sha, date_str):
     """
     Update failure history with this run's results.
 
     Returns (history, new_failures, recurring_failures, resolved_tests).
-    resolved_tests = previously active failures that passed this run (stabilized).
+
+    Classification logic:
+      - "new failure": never seen before (no history entry at all)
+      - "recurring": was already active (failing on previous runs)
+      - "bouncing": was resolved recently but failed again — reactivated
+        as recurring (not new), and marked cross-run flaky after 2+ bounces
+      - "resolved": was active, now passes — notified once, then silent
+        on subsequent passes
     """
     tests = history.setdefault("tests", {})
     new_failures = []
@@ -206,14 +238,46 @@ def update_history(history, classified, sha, date_str):
     for entry in classified["failed"] + classified["error"]:
         test_key = f"{entry['suite']}::{entry['classname']}::{entry['name']}"
 
-        if test_key in tests and tests[test_key]["status"] == "active":
-            tests[test_key]["last_seen_date"] = date_str
-            tests[test_key]["last_seen_sha"] = sha
-            tests[test_key]["failure_count"] += 1
-            recurring_failures.append(
-                {**entry, "first_seen": tests[test_key]["first_seen_date"]}
-            )
+        if test_key in tests:
+            rec = tests[test_key]
+
+            if rec["status"] == "active":
+                # Still failing — bump count
+                rec["last_seen_date"] = date_str
+                rec["last_seen_sha"] = sha
+                rec["failure_count"] += 1
+                recurring_failures.append(
+                    {**entry, "first_seen": rec["first_seen_date"]}
+                )
+            elif rec["status"] == "resolved" and _is_recent_resolve(
+                rec, date_str
+            ):
+                # Bouncing: resolved recently but failed again.
+                # Reactivate as recurring, not new. Track the bounce.
+                rec["status"] = "active"
+                rec["last_seen_date"] = date_str
+                rec["last_seen_sha"] = sha
+                rec["failure_count"] += 1
+                rec["bounce_count"] = rec.get("bounce_count", 0) + 1
+                if rec["bounce_count"] >= BOUNCE_THRESHOLD:
+                    rec["is_flaky"] = True
+                recurring_failures.append(
+                    {
+                        **entry,
+                        "first_seen": rec["first_seen_date"],
+                        "is_bouncing": True,
+                    }
+                )
+            else:
+                # Resolved long ago — treat as new cycle but keep history
+                rec["status"] = "active"
+                rec["last_seen_date"] = date_str
+                rec["last_seen_sha"] = sha
+                rec["failure_count"] += 1
+                rec["bounce_count"] = rec.get("bounce_count", 0) + 1
+                new_failures.append(entry)
         else:
+            # Truly new — never seen before
             tests[test_key] = {
                 "suite": entry["suite"],
                 "classname": entry["classname"],
@@ -224,18 +288,24 @@ def update_history(history, classified, sha, date_str):
                 "last_seen_sha": sha,
                 "failure_count": 1,
                 "is_flaky": False,
+                "bounce_count": 0,
                 "status": "active",
             }
             new_failures.append(entry)
 
-    # --- Flaky tests ---
+    # --- Flaky tests (passed on retry within this run) ---
     for entry in classified["flaky"]:
         test_key = f"{entry['suite']}::{entry['classname']}::{entry['name']}"
         if test_key in tests:
-            tests[test_key]["last_seen_date"] = date_str
-            tests[test_key]["last_seen_sha"] = sha
-            tests[test_key]["failure_count"] += 1
-            tests[test_key]["is_flaky"] = True
+            rec = tests[test_key]
+            rec["last_seen_date"] = date_str
+            rec["last_seen_sha"] = sha
+            rec["failure_count"] += 1
+            rec["is_flaky"] = True
+            # If it was resolved, reactivate — it's still unstable
+            if rec["status"] == "resolved":
+                rec["status"] = "active"
+                rec["bounce_count"] = rec.get("bounce_count", 0) + 1
         else:
             tests[test_key] = {
                 "suite": entry["suite"],
@@ -247,6 +317,7 @@ def update_history(history, classified, sha, date_str):
                 "last_seen_sha": sha,
                 "failure_count": 1,
                 "is_flaky": True,
+                "bounce_count": 0,
                 "status": "active",
             }
 
@@ -269,9 +340,12 @@ def update_history(history, classified, sha, date_str):
                     "name": rec["name"],
                     "first_seen": rec["first_seen_date"],
                     "failure_count": rec["failure_count"],
+                    "bounce_count": rec.get("bounce_count", 0),
                     "was_flaky": rec.get("is_flaky", False),
                 }
             )
+        # If already "resolved" and passes again — no notification.
+        # The resolved notification was sent once when it first stabilized.
 
     return history, new_failures, recurring_failures, resolved_tests
 

From d34bf28e9359d8bb507766f42fdf9b8495d10784 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 16:03:38 -0500
Subject: [PATCH 11/60] Make bounce window and threshold configurable via env
 vars

CUOPT_BOUNCE_WINDOW_DAYS (default 14) and CUOPT_BOUNCE_THRESHOLD
(default 2) can now be set as environment variables to tune flaky
test detection without code changes.
---
 ci/utils/nightly_report.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/utils/nightly_report.py b/ci/utils/nightly_report.py
index a64f8f5a28..2bd23b1f18 100755
--- a/ci/utils/nightly_report.py
+++ b/ci/utils/nightly_report.py
@@ -51,10 +51,10 @@
 
 # A test that resolves then fails again within this window is considered
 # "bouncing" (intermittently flaky) rather than a new failure.
-BOUNCE_WINDOW_DAYS = 14
+BOUNCE_WINDOW_DAYS = int(os.environ.get("CUOPT_BOUNCE_WINDOW_DAYS", 14))
 
 # Number of failure/resolve cycles that classify a test as cross-run flaky.
-BOUNCE_THRESHOLD = 2
+BOUNCE_THRESHOLD = int(os.environ.get("CUOPT_BOUNCE_THRESHOLD", 2))
 
 
 # ---------------------------------------------------------------------------

From 2eae30b4ab8edfdf9d6611176bbb9c6aef5cd183 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 17:47:23 -0500
Subject: [PATCH 12/60] Convert nightly-summary to inline job for secret access

The custom-job.yaml reusable workflow does not expose secrets as env
vars. Convert nightly-summary to an inline job that directly sets
all required secrets (S3, Slack) in the step environment.
---
 .github/workflows/test.yaml | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 7de1f43bbb..3ced840676 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -107,14 +107,21 @@ jobs:
       - wheel-tests-cuopt
       - wheel-tests-cuopt-server
       - conda-notebook-tests
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main
-    with:
-      build_type: ${{ inputs.build_type }}
-      branch: ${{ inputs.branch }}
-      date: ${{ inputs.date }}
-      sha: ${{ inputs.sha }}
-      node_type: "cpu4"
-      arch: "amd64"
-      container_image: "rapidsai/ci-conda:26.06-latest"
-      script: ci/nightly_summary.sh
+    runs-on: linux-amd64-cpu4
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.sha }}
+      - name: Install dependencies
+        run: pip install awscli
+      - name: Run nightly summary
+        env:
+          CUOPT_DATASET_S3_URI: ${{ secrets.CUOPT_DATASET_S3_URI }}
+          CUOPT_AWS_ACCESS_KEY_ID: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
+          CUOPT_AWS_SECRET_ACCESS_KEY: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+          CUOPT_SLACK_WEBHOOK_URL: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
+          CUOPT_SLACK_BOT_TOKEN: ${{ secrets.CUOPT_SLACK_BOT_TOKEN }}
+          CUOPT_SLACK_CHANNEL_ID: ${{ secrets.CUOPT_SLACK_CHANNEL_ID }}
+          RAPIDS_BUILD_TYPE: ${{ inputs.build_type }}
+          RAPIDS_BRANCH: ${{ inputs.branch }}
+        run: bash ci/nightly_summary.sh

From e977784e9dc3466f59985dbb78fab0128ca2d48c Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 13 Apr 2026 17:48:35 -0500
Subject: [PATCH 13/60] Fix S3 auth: prefer role-based credentials over
 CUOPT_AWS overrides

In CI, aws-actions/configure-aws-credentials sets role-based tokens
(AWS_ACCESS_KEY_ID + AWS_SESSION_TOKEN). The CUOPT_AWS_* overrides
were replacing these with static keys that lack the session token,
causing InvalidToken errors. Now only fall back to CUOPT_AWS_* when
standard AWS credentials are not already set.
---
 ci/utils/s3_helpers.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/ci/utils/s3_helpers.py b/ci/utils/s3_helpers.py
index 572b61a409..a550a869d5 100644
--- a/ci/utils/s3_helpers.py
+++ b/ci/utils/s3_helpers.py
@@ -15,14 +15,21 @@
 
 
 def s3_env():
-    """Build env dict with CUOPT AWS credentials mapped to standard AWS vars."""
+    """Build env dict for AWS CLI calls.
+
+    Prefers credentials already set by aws-actions/configure-aws-credentials
+    (role-based tokens via AWS_ACCESS_KEY_ID / AWS_SESSION_TOKEN).  Falls
+    back to CUOPT_AWS_* overrides only when standard AWS vars are absent.
+    """
     env = os.environ.copy()
-    if os.environ.get("CUOPT_AWS_ACCESS_KEY_ID"):
-        env["AWS_ACCESS_KEY_ID"] = os.environ["CUOPT_AWS_ACCESS_KEY_ID"]
-    if os.environ.get("CUOPT_AWS_SECRET_ACCESS_KEY"):
-        env["AWS_SECRET_ACCESS_KEY"] = os.environ[
-            "CUOPT_AWS_SECRET_ACCESS_KEY"
-        ]
+    # Only override if standard AWS credentials are not already configured
+    if not os.environ.get("AWS_ACCESS_KEY_ID"):
+        if os.environ.get("CUOPT_AWS_ACCESS_KEY_ID"):
+            env["AWS_ACCESS_KEY_ID"] = os.environ["CUOPT_AWS_ACCESS_KEY_ID"]
+        if os.environ.get("CUOPT_AWS_SECRET_ACCESS_KEY"):
+            env["AWS_SECRET_ACCESS_KEY"] = os.environ[
+                "CUOPT_AWS_SECRET_ACCESS_KEY"
+            ]
     if os.environ.get("CUOPT_AWS_REGION"):
         env["AWS_DEFAULT_REGION"] = os.environ["CUOPT_AWS_REGION"]
     elif "AWS_DEFAULT_REGION" not in env:

From 124aeb2ea6237eed3bc1e77a5ec2cb452a4cb340 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 09:48:39 -0500
Subject: [PATCH 14/60] Fix S3 auth: use CUOPT_AWS static keys and unset
 session token

The cuOpt S3 bucket requires CUOPT_AWS_* static credentials. The
role-based session token from aws-actions/configure-aws-credentials
was causing InvalidToken errors. Always override with CUOPT_AWS_*
and unset AWS_SESSION_TOKEN, matching the pattern in datasets/*.sh.
---
 ci/utils/s3_helpers.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/ci/utils/s3_helpers.py b/ci/utils/s3_helpers.py
index a550a869d5..be1d2c872b 100644
--- a/ci/utils/s3_helpers.py
+++ b/ci/utils/s3_helpers.py
@@ -15,21 +15,23 @@
 
 
 def s3_env():
-    """Build env dict for AWS CLI calls.
+    """Build env dict for AWS CLI calls using CUOPT-specific credentials.
 
-    Prefers credentials already set by aws-actions/configure-aws-credentials
-    (role-based tokens via AWS_ACCESS_KEY_ID / AWS_SESSION_TOKEN).  Falls
-    back to CUOPT_AWS_* overrides only when standard AWS vars are absent.
+    The cuOpt S3 bucket requires explicit CUOPT_AWS_* static credentials.
+    Role-based credentials from aws-actions/configure-aws-credentials do not
+    have access.  We override AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY with
+    the CUOPT_* values and unset AWS_SESSION_TOKEN to avoid mixing with
+    role-based session tokens (matching the pattern in datasets/*.sh).
     """
     env = os.environ.copy()
-    # Only override if standard AWS credentials are not already configured
-    if not os.environ.get("AWS_ACCESS_KEY_ID"):
-        if os.environ.get("CUOPT_AWS_ACCESS_KEY_ID"):
-            env["AWS_ACCESS_KEY_ID"] = os.environ["CUOPT_AWS_ACCESS_KEY_ID"]
-        if os.environ.get("CUOPT_AWS_SECRET_ACCESS_KEY"):
-            env["AWS_SECRET_ACCESS_KEY"] = os.environ[
-                "CUOPT_AWS_SECRET_ACCESS_KEY"
-            ]
+    if os.environ.get("CUOPT_AWS_ACCESS_KEY_ID"):
+        env["AWS_ACCESS_KEY_ID"] = os.environ["CUOPT_AWS_ACCESS_KEY_ID"]
+    if os.environ.get("CUOPT_AWS_SECRET_ACCESS_KEY"):
+        env["AWS_SECRET_ACCESS_KEY"] = os.environ[
+            "CUOPT_AWS_SECRET_ACCESS_KEY"
+        ]
+    # Unset session token to avoid mixing role-based tokens with static keys
+    env.pop("AWS_SESSION_TOKEN", None)
     if os.environ.get("CUOPT_AWS_REGION"):
         env["AWS_DEFAULT_REGION"] = os.environ["CUOPT_AWS_REGION"]
     elif "AWS_DEFAULT_REGION" not in env:

From 338b7bbf21099ae853075e471c56ee03921020d0 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 11:26:18 -0500
Subject: [PATCH 15/60] Extract nightly-summary into reusable workflow

Move the nightly-summary job out of test.yaml into its own
nightly-summary.yaml reusable workflow. Runs in a python:3.12-slim
container to avoid PEP 668 externally-managed-environment errors
when installing awscli. Also adds workflow_dispatch trigger so
the summary can be re-run manually against an earlier test run.
---
 .github/workflows/nightly-summary.yaml | 69 ++++++++++++++++++++++++++
 .github/workflows/test.yaml            | 30 +++++------
 2 files changed, 81 insertions(+), 18 deletions(-)
 create mode 100644 .github/workflows/nightly-summary.yaml

diff --git a/.github/workflows/nightly-summary.yaml b/.github/workflows/nightly-summary.yaml
new file mode 100644
index 0000000000..166853a1f3
--- /dev/null
+++ b/.github/workflows/nightly-summary.yaml
@@ -0,0 +1,69 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+name: nightly-summary
+
+on:
+  workflow_dispatch:
+    inputs:
+      branch:
+        description: "Branch name the run targets"
+        required: true
+        type: string
+        default: main
+      sha:
+        description: "Full git commit SHA to check out"
+        required: true
+        type: string
+      build_type:
+        description: "Build type (nightly, pull-request, branch)"
+        required: true
+        type: string
+        default: nightly
+  workflow_call:
+    inputs:
+      branch:
+        required: true
+        type: string
+      sha:
+        required: true
+        type: string
+      build_type:
+        required: true
+        type: string
+    secrets:
+      CUOPT_DATASET_S3_URI:
+        required: true
+      CUOPT_AWS_ACCESS_KEY_ID:
+        required: true
+      CUOPT_AWS_SECRET_ACCESS_KEY:
+        required: true
+      CUOPT_SLACK_WEBHOOK_URL:
+        required: false
+      CUOPT_SLACK_BOT_TOKEN:
+        required: false
+      CUOPT_SLACK_CHANNEL_ID:
+        required: false
+
+jobs:
+  nightly-summary:
+    runs-on: linux-amd64-cpu4
+    container:
+      image: python:3.12-slim
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.sha }}
+      - name: Install dependencies
+        run: pip install awscli
+      - name: Run nightly summary
+        env:
+          CUOPT_DATASET_S3_URI: ${{ secrets.CUOPT_DATASET_S3_URI }}
+          CUOPT_AWS_ACCESS_KEY_ID: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
+          CUOPT_AWS_SECRET_ACCESS_KEY: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+          CUOPT_SLACK_WEBHOOK_URL: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
+          CUOPT_SLACK_BOT_TOKEN: ${{ secrets.CUOPT_SLACK_BOT_TOKEN }}
+          CUOPT_SLACK_CHANNEL_ID: ${{ secrets.CUOPT_SLACK_CHANNEL_ID }}
+          RAPIDS_BUILD_TYPE: ${{ inputs.build_type }}
+          RAPIDS_BRANCH: ${{ inputs.branch }}
+        run: bash ci/nightly_summary.sh
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 3ced840676..097f607244 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -107,21 +107,15 @@ jobs:
       - wheel-tests-cuopt
       - wheel-tests-cuopt-server
       - conda-notebook-tests
-    runs-on: linux-amd64-cpu4
-    steps:
-      - uses: actions/checkout@v6
-        with:
-          ref: ${{ inputs.sha }}
-      - name: Install dependencies
-        run: pip install awscli
-      - name: Run nightly summary
-        env:
-          CUOPT_DATASET_S3_URI: ${{ secrets.CUOPT_DATASET_S3_URI }}
-          CUOPT_AWS_ACCESS_KEY_ID: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
-          CUOPT_AWS_SECRET_ACCESS_KEY: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
-          CUOPT_SLACK_WEBHOOK_URL: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
-          CUOPT_SLACK_BOT_TOKEN: ${{ secrets.CUOPT_SLACK_BOT_TOKEN }}
-          CUOPT_SLACK_CHANNEL_ID: ${{ secrets.CUOPT_SLACK_CHANNEL_ID }}
-          RAPIDS_BUILD_TYPE: ${{ inputs.build_type }}
-          RAPIDS_BRANCH: ${{ inputs.branch }}
-        run: bash ci/nightly_summary.sh
+    uses: ./.github/workflows/nightly-summary.yaml
+    with:
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      build_type: ${{ inputs.build_type }}
+    secrets:
+      CUOPT_DATASET_S3_URI: ${{ secrets.CUOPT_DATASET_S3_URI }}
+      CUOPT_AWS_ACCESS_KEY_ID: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
+      CUOPT_AWS_SECRET_ACCESS_KEY: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+      CUOPT_SLACK_WEBHOOK_URL: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
+      CUOPT_SLACK_BOT_TOKEN: ${{ secrets.CUOPT_SLACK_BOT_TOKEN }}
+      CUOPT_SLACK_CHANNEL_ID: ${{ secrets.CUOPT_SLACK_CHANNEL_ID }}

From 15641c508a1839b3354ee343b2be8a466c78cf6c Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 11:52:59 -0500
Subject: [PATCH 16/60] Add curl to nightly-summary container for Slack
 notifications

The python:3.12-slim image doesn't include curl, which is needed
by send_consolidated_summary.sh for Slack webhook and file upload.
---
 .github/workflows/nightly-summary.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/nightly-summary.yaml b/.github/workflows/nightly-summary.yaml
index 166853a1f3..214fe84c97 100644
--- a/.github/workflows/nightly-summary.yaml
+++ b/.github/workflows/nightly-summary.yaml
@@ -55,7 +55,9 @@ jobs:
         with:
           ref: ${{ inputs.sha }}
       - name: Install dependencies
-        run: pip install awscli
+        run: |
+          apt-get update && apt-get install -y --no-install-recommends curl
+          pip install awscli
       - name: Run nightly summary
         env:
           CUOPT_DATASET_S3_URI: ${{ secrets.CUOPT_DATASET_S3_URI }}

From 477b273825445cac455b0a53e1cc3be273739f42 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 12:52:23 -0500
Subject: [PATCH 17/60] Improve nightly Slack reporting and thirdparty test
 visibility

- Filter consolidated.json from S3 aggregation to fix "unknown" entry
- Migrate Slack file upload from deprecated files.upload to
  getUploadURLExternal + completeUploadExternal
- Chunk Slack messages into header/grid/details/links to stay within
  block and character limits
- Remove S3 link from Slack in favor of HTML file attachment
- Add --junitxml to Pyomo, CvxPy, and PuLP thirdparty test scripts
  so failures appear in nightly reports
- Export RAPIDS_TESTS_DIR from test_wheel_cuopt.sh for subprocesses
---
 ci/nightly_summary.sh                    |   1 -
 ci/test_wheel_cuopt.sh                   |   1 +
 ci/thirdparty-testing/run_cvxpy_tests.sh |   4 +
 ci/thirdparty-testing/run_pulp_tests.sh  |   4 +
 ci/thirdparty-testing/run_pyomo_tests.sh |   4 +
 ci/utils/aggregate_nightly.py            |   5 +-
 ci/utils/send_consolidated_summary.sh    | 275 +++++++++++++----------
 7 files changed, 177 insertions(+), 117 deletions(-)

diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index 53075d18c6..c0aab7a52d 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -63,7 +63,6 @@ if [ -n "${CUOPT_SLACK_WEBHOOK_URL:-}" ] && [ "${RAPIDS_BUILD_TYPE:-}" = "nightl
     SLACK_WEBHOOK_URL="${CUOPT_SLACK_WEBHOOK_URL}" \
     SLACK_BOT_TOKEN="${CUOPT_SLACK_BOT_TOKEN:-}" \
     SLACK_CHANNEL_ID="${CUOPT_SLACK_CHANNEL_ID:-}" \
-    REPORT_URL="${S3_CONSOLIDATED_HTML}" \
         bash "${SCRIPT_DIR}/utils/send_consolidated_summary.sh"
 fi
 
diff --git a/ci/test_wheel_cuopt.sh b/ci/test_wheel_cuopt.sh
index 5d002731b0..878db67594 100755
--- a/ci/test_wheel_cuopt.sh
+++ b/ci/test_wheel_cuopt.sh
@@ -64,6 +64,7 @@ RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
 export RAPIDS_DATASET_ROOT_DIR
 
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
+export RAPIDS_TESTS_DIR
 mkdir -p "${RAPIDS_TESTS_DIR}"
 
 EXITCODE=0
diff --git a/ci/thirdparty-testing/run_cvxpy_tests.sh b/ci/thirdparty-testing/run_cvxpy_tests.sh
index c336f6a800..4b874fc4f0 100755
--- a/ci/thirdparty-testing/run_cvxpy_tests.sh
+++ b/ci/thirdparty-testing/run_cvxpy_tests.sh
@@ -32,10 +32,14 @@ python -m pip install \
 # ensure that environment is still consistent (i.e. cvxpy requirements do not conflict with cuopt's)
 pip check
 
+RAPIDS_TESTS_DIR="${RAPIDS_TESTS_DIR:-${PWD}/test-results}"
+mkdir -p "${RAPIDS_TESTS_DIR}"
+
 echo "running 'cvxpy' tests"
 timeout 3m python -m pytest \
     --verbose \
     --capture=no \
     --error-for-skips \
+    --junitxml="${RAPIDS_TESTS_DIR}/junit-thirdparty-cvxpy.xml" \
     -k "TestCUOPT" \
     ./cvxpy/tests/test_conic_solvers.py
diff --git a/ci/thirdparty-testing/run_pulp_tests.sh b/ci/thirdparty-testing/run_pulp_tests.sh
index f9cb0ca8a5..2c26db7a23 100755
--- a/ci/thirdparty-testing/run_pulp_tests.sh
+++ b/ci/thirdparty-testing/run_pulp_tests.sh
@@ -23,6 +23,9 @@ python -m pip install \
 
 pip check
 
+RAPIDS_TESTS_DIR="${RAPIDS_TESTS_DIR:-${PWD}/test-results}"
+mkdir -p "${RAPIDS_TESTS_DIR}"
+
 rapids-logger "running PuLP tests (cuOpt-related)"
 # PuLP uses pytest; run only tests that reference cuopt/CUOPT
 # Exit code 5 = no tests collected; then try run_tests.py which detects solvers (including cuopt)
@@ -30,6 +33,7 @@ pytest_rc=0
 timeout 5m python -m pytest \
     --verbose \
     --capture=no \
+    --junitxml="${RAPIDS_TESTS_DIR}/junit-thirdparty-pulp.xml" \
     -k "cuopt or CUOPT" \
     pulp/tests/ || pytest_rc=$?
 
diff --git a/ci/thirdparty-testing/run_pyomo_tests.sh b/ci/thirdparty-testing/run_pyomo_tests.sh
index f50df676c9..d2b0639f6e 100755
--- a/ci/thirdparty-testing/run_pyomo_tests.sh
+++ b/ci/thirdparty-testing/run_pyomo_tests.sh
@@ -23,11 +23,15 @@ python -m pip install \
 
 pip check
 
+RAPIDS_TESTS_DIR="${RAPIDS_TESTS_DIR:-${PWD}/test-results}"
+mkdir -p "${RAPIDS_TESTS_DIR}"
+
 rapids-logger "running Pyomo tests (cuopt_direct / cuOpt-related)"
 # Run only tests that reference cuopt (cuopt_direct solver)
 timeout 5m python -m pytest \
     --verbose \
     --capture=no \
+    --junitxml="${RAPIDS_TESTS_DIR}/junit-thirdparty-pyomo.xml" \
     -k "cuopt or CUOPT" \
     pyomo/solvers/tests/
 
diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index 4517ab3c6a..31e567f487 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -45,7 +45,10 @@ def download_summaries(s3_prefix, local_dir):
     local_dir.mkdir(parents=True, exist_ok=True)
 
     uris = s3_list(s3_prefix)
-    json_uris = [u for u in uris if u.endswith(".json")]
+    json_uris = [
+        u for u in uris
+        if u.endswith(".json") and not u.endswith("/consolidated.json")
+    ]
     print(f"Found {len(json_uris)} summary file(s) at {s3_prefix}")
 
     summaries = []
diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index 4f421678dc..32f9f8005b 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -2,21 +2,20 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-# Send a single consolidated Slack notification for the entire nightly run.
-# Reads the aggregated JSON produced by aggregate_nightly.py and sends a rich
-# Slack message with:
-#   - Matrix grid overview (test_type x matrix → status)
-#   - Failure tables with :new: / :repeat: badges and matrix context
-#   - @channel on new genuine failures
-#   - Stabilized and flaky test summaries
-#   - Link to GitHub Actions run and consolidated HTML report
+# Send a consolidated Slack notification for the entire nightly run.
+# Reads the aggregated JSON produced by aggregate_nightly.py and sends
+# chunked Slack messages:
+#   1. Header + status summary + test totals
+#   2. Matrix grid (passed / failed / flaky, chunked by test type)
+#   3. Failure details (new, recurring, stabilized, flaky)
+#   4. Links
+# Then uploads the HTML report as a Slack file.
 #
 # Required environment variables:
 #   SLACK_WEBHOOK_URL       - Slack incoming webhook URL
 #   CONSOLIDATED_SUMMARY    - Path to consolidated_summary.json
 #
 # Optional environment variables:
-#   REPORT_URL              - Link to the consolidated HTML report on S3
 #   CONSOLIDATED_HTML       - Path to consolidated HTML file to upload to Slack
 #   SLACK_BOT_TOKEN         - Slack Bot Token (xoxb-*) for file uploads
 #   SLACK_CHANNEL_ID        - Slack channel ID for file uploads (required with bot token)
@@ -25,7 +24,6 @@ set -euo pipefail
 
 CONSOLIDATED_SUMMARY="${CONSOLIDATED_SUMMARY:?CONSOLIDATED_SUMMARY must point to consolidated_summary.json}"
 SLACK_WEBHOOK_URL="${SLACK_WEBHOOK_URL:?SLACK_WEBHOOK_URL is required}"
-REPORT_URL="${REPORT_URL:-}"
 CONSOLIDATED_HTML="${CONSOLIDATED_HTML:-}"
 SLACK_BOT_TOKEN="${SLACK_BOT_TOKEN:-}"
 SLACK_CHANNEL_ID="${SLACK_CHANNEL_ID:-}"
@@ -35,10 +33,11 @@ if [ ! -f "${CONSOLIDATED_SUMMARY}" ]; then
     exit 1
 fi
 
-PAYLOAD=$(python3 - "${CONSOLIDATED_SUMMARY}" "${REPORT_URL}" <<'PYEOF'
+# Generate chunked Slack payloads — one JSON object per line
+PAYLOADS=$(python3 - "${CONSOLIDATED_SUMMARY}" <<'PYEOF'
 import json, sys
 
-summary_path, report_url = sys.argv[1:3]
+summary_path = sys.argv[1]
 
 with open(summary_path) as f:
     d = json.load(f)
@@ -56,7 +55,25 @@ failed_jobs = jobs.get("failed", 0)
 flaky_jobs = jobs.get("flaky", 0)
 passed_jobs = jobs.get("passed", 0)
 
-# --- Status line ---
+status_icons = {
+    "passed": ":white_check_mark:",
+    "failed-new": ":rotating_light:",
+    "failed-recurring": ":x:",
+    "flaky": ":warning:",
+    "no-results": ":grey_question:",
+}
+
+def make_payload(blocks):
+    return json.dumps({
+        "username": "cuOpt Nightly Bot",
+        "icon_emoji": ":robot_face:",
+        "blocks": blocks,
+    })
+
+
+# ── Message 1: Header + status + totals ──────────────────────────────
+blocks = []
+
 if failed_jobs > 0 and has_new:
     emoji = ":rotating_light:"
     text = f"NEW test failures in {failed_jobs} matrix job(s)"
@@ -82,9 +99,6 @@ stats = (
     f"Total: {totals.get('total', 0)}"
 )
 
-blocks = []
-
-# Header
 blocks.append({
     "type": "header",
     "text": {
@@ -93,8 +107,6 @@ blocks.append({
         "emoji": True,
     },
 })
-
-# Status summary
 blocks.append({
     "type": "section",
     "text": {
@@ -102,25 +114,18 @@ blocks.append({
         "text": f"{mention}{emoji} *{text}*\n\n{stats}",
     },
 })
+print(make_payload(blocks))
 
-blocks.append({"type": "divider"})
 
-# --- Matrix grid (compact) ---
-# Group by test_type for readability
+# ── Message 2: Matrix grid (chunked by test type) ────────────────────
 test_types = {}
 for g in grid:
     tt = g["test_type"]
     test_types.setdefault(tt, []).append(g)
 
-status_icons = {
-    "passed": ":white_check_mark:",
-    "failed-new": ":rotating_light:",
-    "failed-recurring": ":x:",
-    "flaky": ":warning:",
-    "no-results": ":grey_question:",
-}
-
-grid_lines = []
+# Split into sections that fit within Slack's 3000 char limit per block
+grid_blocks = []
+current_text = ""
 for tt, entries in sorted(test_types.items()):
     cells = []
     for g in entries:
@@ -131,154 +136,194 @@ for tt, entries in sorted(test_types.items()):
             cells.append(f"{icon} `{label}` ({failed_count} failures)")
         else:
             cells.append(f"{icon} `{label}`")
-    grid_lines.append(f"*{tt}*\n" + "\n".join(f"    {c}" for c in cells))
-
-# Slack blocks have a 3000 char limit per text field; truncate if needed
-grid_text = "\n".join(grid_lines)
-if len(grid_text) > 2900:
-    # Summarize instead of full grid
-    grid_text = (
-        f"*Matrix Summary:* {passed_jobs} passed, {failed_jobs} failed, "
-        f"{flaky_jobs} flaky out of {total_jobs} jobs\n"
-        f"_(Full matrix in report link below)_"
-    )
+    section = f"*{tt}*\n" + "\n".join(f"    {c}" for c in cells) + "\n"
+
+    # If adding this section would exceed limit, flush current block
+    if current_text and len(current_text) + len(section) > 2800:
+        grid_blocks.append({
+            "type": "section",
+            "text": {"type": "mrkdwn", "text": current_text.rstrip()},
+        })
+        current_text = ""
+    current_text += section
+
+if current_text:
+    grid_blocks.append({
+        "type": "section",
+        "text": {"type": "mrkdwn", "text": current_text.rstrip()},
+    })
 
-blocks.append({
-    "type": "section",
-    "text": {"type": "mrkdwn", "text": grid_text},
-})
+# Chunk grid blocks into messages of at most 48 blocks (leave room for divider)
+for i in range(0, len(grid_blocks), 48):
+    chunk = grid_blocks[i:i+48]
+    print(make_payload([{"type": "divider"}] + chunk))
 
-# --- New failures (max 10 to avoid hitting Slack limits) ---
+
+# ── Message 3: Failure details ────────────────────────────────────────
+detail_blocks = []
+
+# New failures
 new_failures = d.get("new_failures", [])
 if new_failures:
-    blocks.append({"type": "divider"})
     lines = []
-    for f_entry in new_failures[:10]:
-        msg = f_entry.get("message", "")[:50].replace("\n", " ")
+    for f_entry in new_failures[:15]:
+        msg = f_entry.get("message", "")[:80].replace("\n", " ")
         matrix = f_entry.get("matrix_label", "")
         lines.append(
-            f"  :new:  `{f_entry['name']}` ({f_entry['test_type']} / {matrix}) \u2014 {msg}"
+            f":new:  `{f_entry['name']}` ({f_entry['test_type']} / {matrix})\n       {msg}"
         )
-    if len(new_failures) > 10:
-        lines.append(f"  _...and {len(new_failures) - 10} more_")
-    blocks.append({
-        "type": "section",
-        "text": {"type": "mrkdwn", "text": "*New Failures:*\n" + "\n".join(lines)},
-    })
-
-# --- Recurring failures (max 10) ---
+    if len(new_failures) > 15:
+        lines.append(f"_...and {len(new_failures) - 15} more_")
+    text = "*:rotating_light: New Failures:*\n" + "\n".join(lines)
+    # Split into 3000-char chunks if needed
+    while text:
+        detail_blocks.append({
+            "type": "section",
+            "text": {"type": "mrkdwn", "text": text[:2900]},
+        })
+        text = text[2900:]
+
+# Recurring failures
 recurring = d.get("recurring_failures", [])
 if recurring:
-    blocks.append({"type": "divider"})
     lines = []
-    for f_entry in recurring[:10]:
+    for f_entry in recurring[:15]:
         matrix = f_entry.get("matrix_label", "")
         first = f_entry.get("first_seen", "?")
         lines.append(
-            f"  :repeat:  `{f_entry['name']}` ({f_entry['test_type']} / {matrix}) \u2014 since {first}"
+            f":repeat:  `{f_entry['name']}` ({f_entry['test_type']} / {matrix}) \u2014 since {first}"
         )
-    if len(recurring) > 10:
-        lines.append(f"  _...and {len(recurring) - 10} more_")
-    blocks.append({
+    if len(recurring) > 15:
+        lines.append(f"_...and {len(recurring) - 15} more_")
+    detail_blocks.append({"type": "divider"})
+    detail_blocks.append({
         "type": "section",
-        "text": {"type": "mrkdwn", "text": "*Recurring Failures:*\n" + "\n".join(lines)},
+        "text": {"type": "mrkdwn", "text": "*:x: Recurring Failures:*\n" + "\n".join(lines)},
     })
 
-# --- Stabilized ---
+# Stabilized
 resolved = d.get("resolved_tests", [])
 if resolved:
     lines = []
-    for r in resolved[:5]:
+    for r in resolved[:10]:
         matrix = r.get("matrix_label", "")
         count = r.get("failure_count", "?")
         lines.append(
-            f"  :white_check_mark:  `{r['name']}` ({r['test_type']} / {matrix}) \u2014 failed {count}x"
+            f":white_check_mark:  `{r['name']}` ({r['test_type']} / {matrix}) \u2014 failed {count}x"
         )
-    if len(resolved) > 5:
-        lines.append(f"  _...and {len(resolved) - 5} more_")
-    blocks.append({
+    if len(resolved) > 10:
+        lines.append(f"_...and {len(resolved) - 10} more_")
+    detail_blocks.append({"type": "divider"})
+    detail_blocks.append({
         "type": "section",
         "text": {
             "type": "mrkdwn",
-            "text": "*Stabilized (were failing, now pass):*\n" + "\n".join(lines),
+            "text": "*:white_check_mark: Stabilized (were failing, now pass):*\n" + "\n".join(lines),
         },
     })
 
-# --- Flaky summary (count only to save space) ---
+# Flaky summary
 flaky = d.get("flaky_tests", [])
 if flaky:
-    # Group by test name to show unique flaky tests
     unique_flaky = {}
     for f_entry in flaky:
         key = f_entry["name"]
         unique_flaky.setdefault(key, []).append(f_entry.get("matrix_label", ""))
     lines = []
-    for name, matrices in sorted(unique_flaky.items())[:5]:
+    for name, matrices in sorted(unique_flaky.items())[:10]:
         matrix_str = ", ".join(matrices[:3])
         if len(matrices) > 3:
             matrix_str += f" +{len(matrices)-3} more"
-        lines.append(f"  :warning:  `{name}` ({matrix_str})")
-    if len(unique_flaky) > 5:
-        lines.append(f"  _...and {len(unique_flaky) - 5} more unique flaky tests_")
-    blocks.append({
+        lines.append(f":warning:  `{name}` ({matrix_str})")
+    if len(unique_flaky) > 10:
+        lines.append(f"_...and {len(unique_flaky) - 10} more unique flaky tests_")
+    detail_blocks.append({"type": "divider"})
+    detail_blocks.append({
         "type": "section",
-        "text": {"type": "mrkdwn", "text": "*Flaky Tests:*\n" + "\n".join(lines)},
+        "text": {"type": "mrkdwn", "text": "*:warning: Flaky Tests:*\n" + "\n".join(lines)},
     })
 
-# --- Links ---
+if detail_blocks:
+    print(make_payload(detail_blocks))
+
+
+# ── Message 4: Links ─────────────────────────────────────────────────
 link_parts = []
 if github_run_url:
-    link_parts.append(f"<{github_run_url}|GitHub Actions>")
-if report_url:
-    link_parts.append(f"<{report_url}|Full Report>")
-if link_parts:
-    blocks.append({"type": "divider"})
-    blocks.append({
-        "type": "context",
-        "elements": [{"type": "mrkdwn", "text": "  ".join(link_parts)}],
-    })
+    link_parts.append(f"<{github_run_url}|:github: GitHub Actions>")
+link_parts.append("_Full report attached below_")
 
-payload = {
-    "channel": "cuopt-regression-testing",
-    "username": "cuOpt Nightly Bot",
-    "icon_emoji": ":robot_face:",
-    "blocks": blocks,
-}
-print(json.dumps(payload))
+if link_parts:
+    print(make_payload([
+        {"type": "divider"},
+        {"type": "context",
+         "elements": [{"type": "mrkdwn", "text": "  |  ".join(link_parts)}]},
+    ]))
 PYEOF
 )
 
 echo "Sending consolidated Slack notification..."
-curl -s -X POST \
-    -H 'Content-type: application/json' \
-    --data "${PAYLOAD}" \
-    "${SLACK_WEBHOOK_URL}"
-
-echo ""
+while IFS= read -r payload; do
+    response=$(curl -s -X POST \
+        -H 'Content-type: application/json' \
+        --data "${payload}" \
+        "${SLACK_WEBHOOK_URL}")
+    if [ "${response}" != "ok" ]; then
+        echo "WARNING: Slack webhook returned: ${response}" >&2
+    fi
+done <<< "${PAYLOADS}"
 echo "Consolidated Slack notification sent."
 
 # Upload HTML report as a file to Slack (requires bot token)
 if [ -n "${SLACK_BOT_TOKEN}" ] && [ -n "${SLACK_CHANNEL_ID}" ] && [ -n "${CONSOLIDATED_HTML}" ] && [ -f "${CONSOLIDATED_HTML}" ]; then
     echo "Uploading HTML report to Slack..."
 
-    # Read date and branch from the summary for the filename
     REPORT_DATE=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('date','report'))" "${CONSOLIDATED_SUMMARY}" 2>/dev/null || echo "report")
     REPORT_BRANCH=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('branch','main'))" "${CONSOLIDATED_SUMMARY}" 2>/dev/null || echo "main")
     UPLOAD_FILENAME="cuopt-nightly-${REPORT_BRANCH}-${REPORT_DATE}.html"
+    FILE_SIZE=$(stat --format=%s "${CONSOLIDATED_HTML}")
+    UPLOAD_TITLE="cuOpt Nightly Report — ${REPORT_BRANCH} — ${REPORT_DATE}"
 
-    UPLOAD_RESPONSE=$(curl -s -X POST \
+    # Step 1: Get an upload URL from Slack
+    URL_RESPONSE=$(curl -s -X POST \
         -H "Authorization: Bearer ${SLACK_BOT_TOKEN}" \
-        -F "channels=${SLACK_CHANNEL_ID}" \
-        -F "file=@${CONSOLIDATED_HTML}" \
-        -F "filename=${UPLOAD_FILENAME}" \
-        -F "title=cuOpt Nightly Report — ${REPORT_BRANCH} — ${REPORT_DATE}" \
-        -F "initial_comment=Full nightly test report attached. Download and open in a browser for interactive details." \
-        "https://slack.com/api/files.upload")
-
-    if echo "${UPLOAD_RESPONSE}" | python3 -c "import json,sys; sys.exit(0 if json.load(sys.stdin).get('ok') else 1)" 2>/dev/null; then
-        echo "HTML report uploaded to Slack."
+        -H "Content-Type: application/x-www-form-urlencoded" \
+        --data-urlencode "filename=${UPLOAD_FILENAME}" \
+        --data-urlencode "length=${FILE_SIZE}" \
+        "https://slack.com/api/files.getUploadURLExternal")
+
+    UPLOAD_URL=$(echo "${URL_RESPONSE}" | python3 -c "import json,sys; print(json.load(sys.stdin).get('upload_url',''))" 2>/dev/null)
+    FILE_ID=$(echo "${URL_RESPONSE}" | python3 -c "import json,sys; print(json.load(sys.stdin).get('file_id',''))" 2>/dev/null)
+
+    if [ -z "${UPLOAD_URL}" ] || [ -z "${FILE_ID}" ]; then
+        echo "WARNING: Slack file upload failed at getUploadURLExternal. Response: ${URL_RESPONSE}" >&2
     else
-        echo "WARNING: Slack file upload failed. Response: ${UPLOAD_RESPONSE}" >&2
+        # Step 2: Upload the file content to the presigned URL
+        curl -s -X POST \
+            -F "file=@${CONSOLIDATED_HTML}" \
+            "${UPLOAD_URL}"
+
+        # Step 3: Complete the upload and share to channel
+        COMPLETE_PAYLOAD=$(python3 -c "
+import json, sys
+print(json.dumps({
+    'files': [{'id': sys.argv[1], 'title': sys.argv[2]}],
+    'channel_id': sys.argv[3],
+    'initial_comment': 'Full nightly test report \u2014 download and open in a browser for interactive details.'
+}))
+" "${FILE_ID}" "${UPLOAD_TITLE}" "${SLACK_CHANNEL_ID}")
+
+        COMPLETE_RESPONSE=$(curl -s -X POST \
+            -H "Authorization: Bearer ${SLACK_BOT_TOKEN}" \
+            -H "Content-Type: application/json" \
+            --data "${COMPLETE_PAYLOAD}" \
+            "https://slack.com/api/files.completeUploadExternal")
+
+        if echo "${COMPLETE_RESPONSE}" | python3 -c "import json,sys; sys.exit(0 if json.load(sys.stdin).get('ok') else 1)" 2>/dev/null; then
+            echo "HTML report uploaded to Slack."
+        else
+            echo "WARNING: Slack file upload failed at completeUploadExternal. Response: ${COMPLETE_RESPONSE}" >&2
+        fi
     fi
 else
     if [ -n "${SLACK_BOT_TOKEN}" ] && [ -z "${SLACK_CHANNEL_ID}" ]; then

From a85e8a0180784596b46dcffa79147dfa1b48f289 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 13:13:43 -0500
Subject: [PATCH 18/60] Add presigned URLs, workflow-level status, and show
 only failures in Slack
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Generate presigned S3 URLs (7-day expiry) for consolidated HTML
  report and dashboard, linked in Slack messages
- Query GitHub API for workflow job statuses to surface CI-level
  failures (notebooks, JuMP, etc.) that don't produce JUnit XML
- Show only failed/flaky matrix entries in Slack instead of listing
  all passing ones — compact summary line for green runs
- Pass GITHUB_TOKEN and GITHUB_RUN_ID to nightly-summary container
- Remove temporary test workflow file
---
 .github/workflows/nightly-summary.yaml |   4 +
 ci/nightly_summary.sh                  |  26 ++++-
 ci/utils/aggregate_nightly.py          |  54 ++++++++-
 ci/utils/send_consolidated_summary.sh  | 150 +++++++++++++++++--------
 4 files changed, 185 insertions(+), 49 deletions(-)

diff --git a/.github/workflows/nightly-summary.yaml b/.github/workflows/nightly-summary.yaml
index 214fe84c97..c286466937 100644
--- a/.github/workflows/nightly-summary.yaml
+++ b/.github/workflows/nightly-summary.yaml
@@ -68,4 +68,8 @@ jobs:
           CUOPT_SLACK_CHANNEL_ID: ${{ secrets.CUOPT_SLACK_CHANNEL_ID }}
           RAPIDS_BUILD_TYPE: ${{ inputs.build_type }}
           RAPIDS_BRANCH: ${{ inputs.branch }}
+          GITHUB_TOKEN: ${{ github.token }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+          GITHUB_SERVER_URL: ${{ github.server_url }}
         run: bash ci/nightly_summary.sh
diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index c0aab7a52d..e23b11d1fc 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -13,6 +13,8 @@
 #   CUOPT_SLACK_WEBHOOK_URL       - sends Slack if set
 #   RAPIDS_BRANCH                 - branch name (default: main)
 #   RAPIDS_BUILD_TYPE             - build type (nightly, pull-request, etc.)
+#   GITHUB_TOKEN                  - for querying workflow job statuses
+#   GITHUB_RUN_ID                 - current workflow run ID
 
 set -euo pipefail
 
@@ -40,6 +42,20 @@ S3_INDEX_URI="${S3_BASE}/index.json"
 S3_DASHBOARD_URI="${S3_BASE}/dashboard/index.html"
 DASHBOARD_DIR="${SCRIPT_DIR}/dashboard"
 
+# --- Query GitHub API for workflow job statuses ---
+WORKFLOW_JOBS_JSON="${OUTPUT_DIR}/workflow_jobs.json"
+if [ -n "${GITHUB_TOKEN:-}" ] && [ -n "${GITHUB_RUN_ID:-}" ] && [ -n "${GITHUB_REPOSITORY:-}" ]; then
+    echo "Fetching workflow job statuses from GitHub API..."
+    curl -s -L \
+        -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+        -H "Accept: application/vnd.github+json" \
+        "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/jobs?per_page=100" \
+        > "${WORKFLOW_JOBS_JSON}" || echo "{}" > "${WORKFLOW_JOBS_JSON}"
+else
+    echo "WARNING: GITHUB_TOKEN or GITHUB_RUN_ID not set, skipping workflow job status." >&2
+    echo "{}" > "${WORKFLOW_JOBS_JSON}"
+fi
+
 echo "Aggregating nightly summaries from ${S3_SUMMARIES_PREFIX}"
 
 python3 "${SCRIPT_DIR}/utils/aggregate_nightly.py" \
@@ -53,7 +69,13 @@ python3 "${SCRIPT_DIR}/utils/aggregate_nightly.py" \
     --output-dir "${OUTPUT_DIR}" \
     --date "${RUN_DATE}" \
     --branch "${BRANCH}" \
-    --github-run-url "${GITHUB_RUN_URL}"
+    --github-run-url "${GITHUB_RUN_URL}" \
+    --workflow-jobs "${WORKFLOW_JOBS_JSON}"
+
+# --- Generate presigned URLs for reports (7-day expiry) ---
+PRESIGN_EXPIRY=604800
+PRESIGNED_HTML=$(aws s3 presign "${S3_CONSOLIDATED_HTML}" --expires-in "${PRESIGN_EXPIRY}" 2>/dev/null || echo "")
+PRESIGNED_DASHBOARD=$(aws s3 presign "${S3_DASHBOARD_URI}" --expires-in "${PRESIGN_EXPIRY}" 2>/dev/null || echo "")
 
 # Send consolidated Slack notification if webhook is available and this is a nightly build
 if [ -n "${CUOPT_SLACK_WEBHOOK_URL:-}" ] && [ "${RAPIDS_BUILD_TYPE:-}" = "nightly" ]; then
@@ -63,6 +85,8 @@ if [ -n "${CUOPT_SLACK_WEBHOOK_URL:-}" ] && [ "${RAPIDS_BUILD_TYPE:-}" = "nightl
     SLACK_WEBHOOK_URL="${CUOPT_SLACK_WEBHOOK_URL}" \
     SLACK_BOT_TOKEN="${CUOPT_SLACK_BOT_TOKEN:-}" \
     SLACK_CHANNEL_ID="${CUOPT_SLACK_CHANNEL_ID:-}" \
+    PRESIGNED_REPORT_URL="${PRESIGNED_HTML}" \
+    PRESIGNED_DASHBOARD_URL="${PRESIGNED_DASHBOARD}" \
         bash "${SCRIPT_DIR}/utils/send_consolidated_summary.sh"
 fi
 
diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index 31e567f487..78172ddbe5 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -172,7 +172,38 @@ def aggregate_summaries(summaries):
 # ---------------------------------------------------------------------------
 
 
-def generate_consolidated_json(agg, date_str, branch, github_run_url=""):
+def parse_workflow_jobs(workflow_jobs_path):
+    """Parse GitHub Actions workflow job statuses from JSON file.
+    Returns a list of dicts with job name, conclusion, and URL."""
+    if not workflow_jobs_path or not Path(workflow_jobs_path).exists():
+        return []
+    try:
+        with open(workflow_jobs_path) as f:
+            data = json.load(f)
+        jobs_list = data.get("jobs", [])
+        result = []
+        for job in jobs_list:
+            name = job.get("name", "")
+            # Skip the nightly-summary job itself
+            if "nightly-summary" in name.lower():
+                continue
+            result.append({
+                "name": name,
+                "conclusion": job.get("conclusion", "unknown"),
+                "status": job.get("status", "unknown"),
+                "url": job.get("html_url", ""),
+            })
+        return result
+    except (json.JSONDecodeError, OSError) as exc:
+        print(
+            f"WARNING: Failed to parse workflow jobs: {exc}",
+            file=sys.stderr,
+        )
+        return []
+
+
+def generate_consolidated_json(agg, date_str, branch, github_run_url="",
+                               workflow_jobs=None):
     """Generate the consolidated JSON for Slack and dashboard."""
     total_jobs = len(agg["matrix_grid"])
     failed_jobs = sum(
@@ -181,6 +212,10 @@ def generate_consolidated_json(agg, date_str, branch, github_run_url=""):
     flaky_jobs = sum(1 for g in agg["matrix_grid"] if g["status"] == "flaky")
     passed_jobs = sum(1 for g in agg["matrix_grid"] if g["status"] == "passed")
 
+    # Workflow-level CI job statuses (notebooks, JuMP, etc.)
+    wf_jobs = workflow_jobs or []
+    failed_ci_jobs = [j for j in wf_jobs if j["conclusion"] == "failure"]
+
     return {
         "timestamp": datetime.now(timezone.utc).isoformat(),
         "date": date_str,
@@ -199,6 +234,8 @@ def generate_consolidated_json(agg, date_str, branch, github_run_url=""):
         "recurring_failures": agg["all_recurring_failures"],
         "flaky_tests": agg["all_flaky_tests"],
         "resolved_tests": agg["all_resolved_tests"],
+        "workflow_jobs": wf_jobs,
+        "failed_ci_jobs": failed_ci_jobs,
     }
 
 
@@ -556,6 +593,11 @@ def main():
         default="",
         help="URL to the GitHub Actions run",
     )
+    parser.add_argument(
+        "--workflow-jobs",
+        default="",
+        help="Path to JSON file with GitHub Actions workflow job statuses",
+    )
 
     args = parser.parse_args()
     output_dir = Path(args.output_dir)
@@ -591,12 +633,22 @@ def main():
         f"{sum(1 for g in agg['matrix_grid'] if g['status'] == 'flaky')} flaky"
     )
 
+    # ---- Step 2b: Parse workflow job statuses ----
+    workflow_jobs = parse_workflow_jobs(args.workflow_jobs)
+    if workflow_jobs:
+        failed_wf = [j for j in workflow_jobs if j["conclusion"] == "failure"]
+        print(
+            f"Workflow jobs: {len(workflow_jobs)} total, "
+            f"{len(failed_wf)} failed"
+        )
+
     # ---- Step 3: Generate outputs ----
     consolidated = generate_consolidated_json(
         agg,
         args.date,
         args.branch,
         args.github_run_url,
+        workflow_jobs,
     )
 
     json_path = output_dir / "consolidated_summary.json"
diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index 32f9f8005b..d302185f26 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -5,10 +5,10 @@
 # Send a consolidated Slack notification for the entire nightly run.
 # Reads the aggregated JSON produced by aggregate_nightly.py and sends
 # chunked Slack messages:
-#   1. Header + status summary + test totals
-#   2. Matrix grid (passed / failed / flaky, chunked by test type)
+#   1. Header + status summary + test totals + failed CI jobs
+#   2. Failed/flaky matrix entries only (not passing ones)
 #   3. Failure details (new, recurring, stabilized, flaky)
-#   4. Links
+#   4. Links (presigned URLs + GitHub Actions)
 # Then uploads the HTML report as a Slack file.
 #
 # Required environment variables:
@@ -16,9 +16,11 @@
 #   CONSOLIDATED_SUMMARY    - Path to consolidated_summary.json
 #
 # Optional environment variables:
-#   CONSOLIDATED_HTML       - Path to consolidated HTML file to upload to Slack
-#   SLACK_BOT_TOKEN         - Slack Bot Token (xoxb-*) for file uploads
-#   SLACK_CHANNEL_ID        - Slack channel ID for file uploads (required with bot token)
+#   CONSOLIDATED_HTML           - Path to consolidated HTML file to upload
+#   SLACK_BOT_TOKEN             - Slack Bot Token (xoxb-*) for file uploads
+#   SLACK_CHANNEL_ID            - Slack channel ID for file uploads
+#   PRESIGNED_REPORT_URL        - Presigned URL for consolidated HTML report
+#   PRESIGNED_DASHBOARD_URL     - Presigned URL for dashboard
 
 set -euo pipefail
 
@@ -27,6 +29,8 @@ SLACK_WEBHOOK_URL="${SLACK_WEBHOOK_URL:?SLACK_WEBHOOK_URL is required}"
 CONSOLIDATED_HTML="${CONSOLIDATED_HTML:-}"
 SLACK_BOT_TOKEN="${SLACK_BOT_TOKEN:-}"
 SLACK_CHANNEL_ID="${SLACK_CHANNEL_ID:-}"
+PRESIGNED_REPORT_URL="${PRESIGNED_REPORT_URL:-}"
+PRESIGNED_DASHBOARD_URL="${PRESIGNED_DASHBOARD_URL:-}"
 
 if [ ! -f "${CONSOLIDATED_SUMMARY}" ]; then
     echo "ERROR: Summary file not found: ${CONSOLIDATED_SUMMARY}" >&2
@@ -34,10 +38,12 @@ if [ ! -f "${CONSOLIDATED_SUMMARY}" ]; then
 fi
 
 # Generate chunked Slack payloads — one JSON object per line
-PAYLOADS=$(python3 - "${CONSOLIDATED_SUMMARY}" <<'PYEOF'
+PAYLOADS=$(python3 - "${CONSOLIDATED_SUMMARY}" "${PRESIGNED_REPORT_URL}" "${PRESIGNED_DASHBOARD_URL}" <<'PYEOF'
 import json, sys
 
 summary_path = sys.argv[1]
+presigned_report_url = sys.argv[2] if len(sys.argv) > 2 else ""
+presigned_dashboard_url = sys.argv[3] if len(sys.argv) > 3 else ""
 
 with open(summary_path) as f:
     d = json.load(f)
@@ -49,12 +55,19 @@ jobs = d.get("job_summary", {})
 totals = d.get("test_totals", {})
 grid = d.get("matrix_grid", [])
 has_new = d.get("has_new_failures", False)
+failed_ci_jobs = d.get("failed_ci_jobs", [])
+workflow_jobs = d.get("workflow_jobs", [])
 
 total_jobs = jobs.get("total", 0)
 failed_jobs = jobs.get("failed", 0)
 flaky_jobs = jobs.get("flaky", 0)
 passed_jobs = jobs.get("passed", 0)
 
+# Count CI-level failures (jobs that failed at workflow level)
+total_ci_jobs = len(workflow_jobs)
+failed_ci_count = len(failed_ci_jobs)
+passed_ci_count = sum(1 for j in workflow_jobs if j["conclusion"] == "success")
+
 status_icons = {
     "passed": ":white_check_mark:",
     "failed-new": ":rotating_light:",
@@ -71,12 +84,22 @@ def make_payload(blocks):
     })
 
 
-# ── Message 1: Header + status + totals ──────────────────────────────
+# ── Message 1: Header + status + totals + CI job failures ────────────
 blocks = []
 
-if failed_jobs > 0 and has_new:
+# Determine overall status considering both test results and CI jobs
+all_green = failed_jobs == 0 and failed_ci_count == 0
+
+if failed_ci_count > 0 or (failed_jobs > 0 and has_new):
     emoji = ":rotating_light:"
-    text = f"NEW test failures in {failed_jobs} matrix job(s)"
+    parts = []
+    if failed_ci_count > 0:
+        parts.append(f"{failed_ci_count} CI job(s) failed")
+    if failed_jobs > 0 and has_new:
+        parts.append(f"NEW test failures in {failed_jobs} matrix job(s)")
+    elif failed_jobs > 0:
+        parts.append(f"recurring failures in {failed_jobs} matrix job(s)")
+    text = " + ".join(parts)
     mention = "<!channel> "
 elif failed_jobs > 0:
     emoji = ":x:"
@@ -89,6 +112,8 @@ elif flaky_jobs > 0:
 else:
     emoji = ":white_check_mark:"
     text = f"All {total_jobs} matrix jobs passed"
+    if total_ci_jobs > 0:
+        text += f", all {passed_ci_count} CI jobs succeeded"
     mention = ""
 
 stats = (
@@ -114,49 +139,76 @@ blocks.append({
         "text": f"{mention}{emoji} *{text}*\n\n{stats}",
     },
 })
-print(make_payload(blocks))
-
 
-# ── Message 2: Matrix grid (chunked by test type) ────────────────────
-test_types = {}
-for g in grid:
-    tt = g["test_type"]
-    test_types.setdefault(tt, []).append(g)
-
-# Split into sections that fit within Slack's 3000 char limit per block
-grid_blocks = []
-current_text = ""
-for tt, entries in sorted(test_types.items()):
-    cells = []
-    for g in entries:
-        icon = status_icons.get(g["status"], ":grey_question:")
-        label = g["matrix_label"]
-        failed_count = g["counts"].get("failed", 0)
-        if failed_count > 0:
-            cells.append(f"{icon} `{label}` ({failed_count} failures)")
+# Show failed CI jobs (notebooks, JuMP, etc.)
+if failed_ci_jobs:
+    lines = []
+    for j in failed_ci_jobs:
+        url = j.get("url", "")
+        name = j["name"]
+        if url:
+            lines.append(f":x:  <{url}|{name}>")
         else:
-            cells.append(f"{icon} `{label}`")
-    section = f"*{tt}*\n" + "\n".join(f"    {c}" for c in cells) + "\n"
+            lines.append(f":x:  {name}")
+    blocks.append({"type": "divider"})
+    blocks.append({
+        "type": "section",
+        "text": {"type": "mrkdwn", "text": "*Failed CI Jobs:*\n" + "\n".join(lines)},
+    })
 
-    # If adding this section would exceed limit, flush current block
-    if current_text and len(current_text) + len(section) > 2800:
+print(make_payload(blocks))
+
+
+# ── Message 2: Failed/flaky matrix entries only ──────────────────────
+# Only show entries that are NOT passed
+failed_grid = [g for g in grid if g["status"] != "passed"]
+
+if failed_grid:
+    test_types = {}
+    for g in failed_grid:
+        tt = g["test_type"]
+        test_types.setdefault(tt, []).append(g)
+
+    grid_blocks = []
+    current_text = ""
+    for tt, entries in sorted(test_types.items()):
+        cells = []
+        for g in entries:
+            icon = status_icons.get(g["status"], ":grey_question:")
+            label = g["matrix_label"]
+            failed_count = g["counts"].get("failed", 0)
+            if failed_count > 0:
+                cells.append(f"{icon} `{label}` ({failed_count} failures)")
+            else:
+                cells.append(f"{icon} `{label}`")
+        section = f"*{tt}*\n" + "\n".join(f"    {c}" for c in cells) + "\n"
+
+        if current_text and len(current_text) + len(section) > 2800:
+            grid_blocks.append({
+                "type": "section",
+                "text": {"type": "mrkdwn", "text": current_text.rstrip()},
+            })
+            current_text = ""
+        current_text += section
+
+    if current_text:
         grid_blocks.append({
             "type": "section",
             "text": {"type": "mrkdwn", "text": current_text.rstrip()},
         })
-        current_text = ""
-    current_text += section
 
-if current_text:
-    grid_blocks.append({
-        "type": "section",
-        "text": {"type": "mrkdwn", "text": current_text.rstrip()},
-    })
-
-# Chunk grid blocks into messages of at most 48 blocks (leave room for divider)
-for i in range(0, len(grid_blocks), 48):
-    chunk = grid_blocks[i:i+48]
-    print(make_payload([{"type": "divider"}] + chunk))
+    for i in range(0, len(grid_blocks), 48):
+        chunk = grid_blocks[i:i+48]
+        print(make_payload([{"type": "divider"}] + chunk))
+else:
+    # All passed — just a compact summary
+    if total_jobs > 0:
+        print(make_payload([
+            {"type": "divider"},
+            {"type": "section",
+             "text": {"type": "mrkdwn",
+                      "text": f":white_check_mark: All {total_jobs} test matrix jobs passed"}},
+        ]))
 
 
 # ── Message 3: Failure details ────────────────────────────────────────
@@ -175,7 +227,6 @@ if new_failures:
     if len(new_failures) > 15:
         lines.append(f"_...and {len(new_failures) - 15} more_")
     text = "*:rotating_light: New Failures:*\n" + "\n".join(lines)
-    # Split into 3000-char chunks if needed
     while text:
         detail_blocks.append({
             "type": "section",
@@ -251,7 +302,12 @@ if detail_blocks:
 link_parts = []
 if github_run_url:
     link_parts.append(f"<{github_run_url}|:github: GitHub Actions>")
-link_parts.append("_Full report attached below_")
+if presigned_report_url:
+    link_parts.append(f"<{presigned_report_url}|:bar_chart: Full Report>")
+if presigned_dashboard_url:
+    link_parts.append(f"<{presigned_dashboard_url}|:chart_with_upwards_trend: Dashboard>")
+if not presigned_report_url:
+    link_parts.append("_Full report attached below_")
 
 if link_parts:
     print(make_payload([

From 3ee8c1fd1dd3f7d835e33f3bf038b91a2b2b24dc Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 13:22:38 -0500
Subject: [PATCH 19/60] Use Slack threading for nightly summary details

Post the main summary (status + links) as a top-level message via
chat.postMessage, then post matrix details, failure breakdowns,
and the HTML report as thread replies. Keeps the channel clean
while preserving full detail in the thread.

Falls back to webhook (no threading) if bot token is not available.
---
 ci/utils/send_consolidated_summary.sh | 175 ++++++++++++++++----------
 1 file changed, 109 insertions(+), 66 deletions(-)

diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index d302185f26..c6fced3676 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -3,22 +3,21 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Send a consolidated Slack notification for the entire nightly run.
-# Reads the aggregated JSON produced by aggregate_nightly.py and sends
-# chunked Slack messages:
-#   1. Header + status summary + test totals + failed CI jobs
-#   2. Failed/flaky matrix entries only (not passing ones)
-#   3. Failure details (new, recurring, stabilized, flaky)
-#   4. Links (presigned URLs + GitHub Actions)
-# Then uploads the HTML report as a Slack file.
+# Reads the aggregated JSON produced by aggregate_nightly.py and sends:
+#   - Main message: Header + status summary + test totals + failed CI jobs
+#   - Thread replies: matrix details, failure details, links, HTML report
+#
+# If SLACK_BOT_TOKEN is available, posts via chat.postMessage (enables
+# threading). Falls back to webhook (no threading) otherwise.
 #
 # Required environment variables:
-#   SLACK_WEBHOOK_URL       - Slack incoming webhook URL
+#   SLACK_WEBHOOK_URL       - Slack incoming webhook URL (fallback)
 #   CONSOLIDATED_SUMMARY    - Path to consolidated_summary.json
 #
 # Optional environment variables:
 #   CONSOLIDATED_HTML           - Path to consolidated HTML file to upload
-#   SLACK_BOT_TOKEN             - Slack Bot Token (xoxb-*) for file uploads
-#   SLACK_CHANNEL_ID            - Slack channel ID for file uploads
+#   SLACK_BOT_TOKEN             - Slack Bot Token (xoxb-*) for threading + file uploads
+#   SLACK_CHANNEL_ID            - Slack channel ID (required with bot token)
 #   PRESIGNED_REPORT_URL        - Presigned URL for consolidated HTML report
 #   PRESIGNED_DASHBOARD_URL     - Presigned URL for dashboard
 
@@ -37,7 +36,8 @@ if [ ! -f "${CONSOLIDATED_SUMMARY}" ]; then
     exit 1
 fi
 
-# Generate chunked Slack payloads — one JSON object per line
+# Generate Slack payloads — one JSON object per line.
+# Line 1 = main message, lines 2+ = thread replies.
 PAYLOADS=$(python3 - "${CONSOLIDATED_SUMMARY}" "${PRESIGNED_REPORT_URL}" "${PRESIGNED_DASHBOARD_URL}" <<'PYEOF'
 import json, sys
 
@@ -63,7 +63,6 @@ failed_jobs = jobs.get("failed", 0)
 flaky_jobs = jobs.get("flaky", 0)
 passed_jobs = jobs.get("passed", 0)
 
-# Count CI-level failures (jobs that failed at workflow level)
 total_ci_jobs = len(workflow_jobs)
 failed_ci_count = len(failed_ci_jobs)
 passed_ci_count = sum(1 for j in workflow_jobs if j["conclusion"] == "success")
@@ -84,12 +83,11 @@ def make_payload(blocks):
     })
 
 
-# ── Message 1: Header + status + totals + CI job failures ────────────
+# ══════════════════════════════════════════════════════════════════════
+# MAIN MESSAGE (line 1) — posted to channel, becomes thread parent
+# ══════════════════════════════════════════════════════════════════════
 blocks = []
 
-# Determine overall status considering both test results and CI jobs
-all_green = failed_jobs == 0 and failed_ci_count == 0
-
 if failed_ci_count > 0 or (failed_jobs > 0 and has_new):
     emoji = ":rotating_light:"
     parts = []
@@ -140,7 +138,7 @@ blocks.append({
     },
 })
 
-# Show failed CI jobs (notebooks, JuMP, etc.)
+# Failed CI jobs in main message
 if failed_ci_jobs:
     lines = []
     for j in failed_ci_jobs:
@@ -156,11 +154,29 @@ if failed_ci_jobs:
         "text": {"type": "mrkdwn", "text": "*Failed CI Jobs:*\n" + "\n".join(lines)},
     })
 
+# Links in main message
+link_parts = []
+if github_run_url:
+    link_parts.append(f"<{github_run_url}|:github: GitHub Actions>")
+if presigned_report_url:
+    link_parts.append(f"<{presigned_report_url}|:bar_chart: Full Report>")
+if presigned_dashboard_url:
+    link_parts.append(f"<{presigned_dashboard_url}|:chart_with_upwards_trend: Dashboard>")
+if link_parts:
+    blocks.append({"type": "divider"})
+    blocks.append({
+        "type": "context",
+        "elements": [{"type": "mrkdwn", "text": "  |  ".join(link_parts)}],
+    })
+
 print(make_payload(blocks))
 
 
-# ── Message 2: Failed/flaky matrix entries only ──────────────────────
-# Only show entries that are NOT passed
+# ══════════════════════════════════════════════════════════════════════
+# THREAD REPLIES (lines 2+) — posted as replies to main message
+# ══════════════════════════════════════════════════════════════════════
+
+# ── Thread 1: Failed/flaky matrix entries ─────────────────────────────
 failed_grid = [g for g in grid if g["status"] != "passed"]
 
 if failed_grid:
@@ -199,22 +215,11 @@ if failed_grid:
 
     for i in range(0, len(grid_blocks), 48):
         chunk = grid_blocks[i:i+48]
-        print(make_payload([{"type": "divider"}] + chunk))
-else:
-    # All passed — just a compact summary
-    if total_jobs > 0:
-        print(make_payload([
-            {"type": "divider"},
-            {"type": "section",
-             "text": {"type": "mrkdwn",
-                      "text": f":white_check_mark: All {total_jobs} test matrix jobs passed"}},
-        ]))
-
+        print(make_payload(chunk))
 
-# ── Message 3: Failure details ────────────────────────────────────────
+# ── Thread 2: Failure details ─────────────────────────────────────────
 detail_blocks = []
 
-# New failures
 new_failures = d.get("new_failures", [])
 if new_failures:
     lines = []
@@ -234,7 +239,6 @@ if new_failures:
         })
         text = text[2900:]
 
-# Recurring failures
 recurring = d.get("recurring_failures", [])
 if recurring:
     lines = []
@@ -252,7 +256,6 @@ if recurring:
         "text": {"type": "mrkdwn", "text": "*:x: Recurring Failures:*\n" + "\n".join(lines)},
     })
 
-# Stabilized
 resolved = d.get("resolved_tests", [])
 if resolved:
     lines = []
@@ -273,7 +276,6 @@ if resolved:
         },
     })
 
-# Flaky summary
 flaky = d.get("flaky_tests", [])
 if flaky:
     unique_flaky = {}
@@ -296,41 +298,78 @@ if flaky:
 
 if detail_blocks:
     print(make_payload(detail_blocks))
-
-
-# ── Message 4: Links ─────────────────────────────────────────────────
-link_parts = []
-if github_run_url:
-    link_parts.append(f"<{github_run_url}|:github: GitHub Actions>")
-if presigned_report_url:
-    link_parts.append(f"<{presigned_report_url}|:bar_chart: Full Report>")
-if presigned_dashboard_url:
-    link_parts.append(f"<{presigned_dashboard_url}|:chart_with_upwards_trend: Dashboard>")
-if not presigned_report_url:
-    link_parts.append("_Full report attached below_")
-
-if link_parts:
-    print(make_payload([
-        {"type": "divider"},
-        {"type": "context",
-         "elements": [{"type": "mrkdwn", "text": "  |  ".join(link_parts)}]},
-    ]))
 PYEOF
 )
 
+# ── Send messages ─────────────────────────────────────────────────────
 echo "Sending consolidated Slack notification..."
+
+THREAD_TS=""
+FIRST=true
+
 while IFS= read -r payload; do
-    response=$(curl -s -X POST \
-        -H 'Content-type: application/json' \
-        --data "${payload}" \
-        "${SLACK_WEBHOOK_URL}")
-    if [ "${response}" != "ok" ]; then
-        echo "WARNING: Slack webhook returned: ${response}" >&2
+    if [ "${FIRST}" = true ] && [ -n "${SLACK_BOT_TOKEN}" ] && [ -n "${SLACK_CHANNEL_ID}" ]; then
+        # Post main message via chat.postMessage to get thread_ts
+        BOT_PAYLOAD=$(python3 -c "
+import json, sys
+p = json.loads(sys.argv[1])
+p['channel'] = sys.argv[2]
+print(json.dumps(p))
+" "${payload}" "${SLACK_CHANNEL_ID}")
+
+        RESPONSE=$(curl -s -X POST \
+            -H "Authorization: Bearer ${SLACK_BOT_TOKEN}" \
+            -H "Content-Type: application/json" \
+            --data "${BOT_PAYLOAD}" \
+            "https://slack.com/api/chat.postMessage")
+
+        THREAD_TS=$(echo "${RESPONSE}" | python3 -c "import json,sys; print(json.load(sys.stdin).get('ts',''))" 2>/dev/null || echo "")
+        OK=$(echo "${RESPONSE}" | python3 -c "import json,sys; print(json.load(sys.stdin).get('ok',''))" 2>/dev/null || echo "")
+
+        if [ "${OK}" != "True" ]; then
+            echo "WARNING: chat.postMessage failed: ${RESPONSE}" >&2
+            # Fall back to webhook for this and remaining messages
+            THREAD_TS=""
+            curl -s -X POST -H 'Content-type: application/json' --data "${payload}" "${SLACK_WEBHOOK_URL}" || true
+        else
+            echo "Main message posted (ts=${THREAD_TS})"
+        fi
+        FIRST=false
+    elif [ -n "${THREAD_TS}" ] && [ -n "${SLACK_BOT_TOKEN}" ] && [ -n "${SLACK_CHANNEL_ID}" ]; then
+        # Post thread reply via chat.postMessage
+        THREAD_PAYLOAD=$(python3 -c "
+import json, sys
+p = json.loads(sys.argv[1])
+p['channel'] = sys.argv[2]
+p['thread_ts'] = sys.argv[3]
+print(json.dumps(p))
+" "${payload}" "${SLACK_CHANNEL_ID}" "${THREAD_TS}")
+
+        RESPONSE=$(curl -s -X POST \
+            -H "Authorization: Bearer ${SLACK_BOT_TOKEN}" \
+            -H "Content-Type: application/json" \
+            --data "${THREAD_PAYLOAD}" \
+            "https://slack.com/api/chat.postMessage")
+
+        OK=$(echo "${RESPONSE}" | python3 -c "import json,sys; print(json.load(sys.stdin).get('ok',''))" 2>/dev/null || echo "")
+        if [ "${OK}" != "True" ]; then
+            echo "WARNING: Thread reply failed: ${RESPONSE}" >&2
+        fi
+    else
+        # Fallback: webhook (no threading)
+        response=$(curl -s -X POST \
+            -H 'Content-type: application/json' \
+            --data "${payload}" \
+            "${SLACK_WEBHOOK_URL}")
+        if [ "${response}" != "ok" ]; then
+            echo "WARNING: Slack webhook returned: ${response}" >&2
+        fi
+        FIRST=false
     fi
 done <<< "${PAYLOADS}"
 echo "Consolidated Slack notification sent."
 
-# Upload HTML report as a file to Slack (requires bot token)
+# ── Upload HTML report as file in thread ──────────────────────────────
 if [ -n "${SLACK_BOT_TOKEN}" ] && [ -n "${SLACK_CHANNEL_ID}" ] && [ -n "${CONSOLIDATED_HTML}" ] && [ -f "${CONSOLIDATED_HTML}" ]; then
     echo "Uploading HTML report to Slack..."
 
@@ -359,15 +398,19 @@ if [ -n "${SLACK_BOT_TOKEN}" ] && [ -n "${SLACK_CHANNEL_ID}" ] && [ -n "${CONSOL
             -F "file=@${CONSOLIDATED_HTML}" \
             "${UPLOAD_URL}"
 
-        # Step 3: Complete the upload and share to channel
+        # Step 3: Complete the upload and share to channel (in thread if available)
         COMPLETE_PAYLOAD=$(python3 -c "
 import json, sys
-print(json.dumps({
+payload = {
     'files': [{'id': sys.argv[1], 'title': sys.argv[2]}],
     'channel_id': sys.argv[3],
-    'initial_comment': 'Full nightly test report \u2014 download and open in a browser for interactive details.'
-}))
-" "${FILE_ID}" "${UPLOAD_TITLE}" "${SLACK_CHANNEL_ID}")
+    'initial_comment': 'Full nightly test report \u2014 download and open in a browser for interactive details.',
+}
+thread_ts = sys.argv[4] if len(sys.argv) > 4 and sys.argv[4] else ''
+if thread_ts:
+    payload['thread_ts'] = thread_ts
+print(json.dumps(payload))
+" "${FILE_ID}" "${UPLOAD_TITLE}" "${SLACK_CHANNEL_ID}" "${THREAD_TS}")
 
         COMPLETE_RESPONSE=$(curl -s -X POST \
             -H "Authorization: Bearer ${SLACK_BOT_TOKEN}" \

From 4e8f5610193c8d2aad820a4d6db1e94747d61682 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 13:40:11 -0500
Subject: [PATCH 20/60] Fix CI job filtering and Slack block size limits

- Filter out per-matrix test jobs (conda-cpp-tests, conda-python-tests,
  wheel-tests-*) from workflow job status since they are already tracked
  by S3 summaries. Only surface untracked jobs like notebooks and JuMP.
- Move full CI job failure list to thread reply to avoid exceeding
  Slack's 3000-char block limit. Main message shows compact summary.
- Chunk CI job details into multiple blocks if needed.
---
 ci/utils/aggregate_nightly.py         | 18 +++++++++++-
 ci/utils/send_consolidated_summary.sh | 40 +++++++++++++++++++--------
 2 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index 78172ddbe5..7df91923d7 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -174,9 +174,22 @@ def aggregate_summaries(summaries):
 
 def parse_workflow_jobs(workflow_jobs_path):
     """Parse GitHub Actions workflow job statuses from JSON file.
-    Returns a list of dicts with job name, conclusion, and URL."""
+    Returns a list of dicts with job name, conclusion, and URL.
+    Only includes jobs NOT already tracked by per-matrix S3 summaries
+    (i.e., excludes conda-cpp-tests, conda-python-tests,
+    wheel-tests-cuopt, wheel-tests-cuopt-server matrix jobs and
+    their compute-matrix helpers)."""
     if not workflow_jobs_path or not Path(workflow_jobs_path).exists():
         return []
+
+    # Job name prefixes that are already covered by per-matrix S3 reports
+    TRACKED_PREFIXES = (
+        "conda-cpp-tests",
+        "conda-python-tests",
+        "wheel-tests-cuopt-server",
+        "wheel-tests-cuopt",
+    )
+
     try:
         with open(workflow_jobs_path) as f:
             data = json.load(f)
@@ -187,6 +200,9 @@ def parse_workflow_jobs(workflow_jobs_path):
             # Skip the nightly-summary job itself
             if "nightly-summary" in name.lower():
                 continue
+            # Skip jobs already tracked by per-matrix S3 summaries
+            if any(name.startswith(prefix) for prefix in TRACKED_PREFIXES):
+                continue
             result.append({
                 "name": name,
                 "conclusion": job.get("conclusion", "unknown"),
diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index c6fced3676..609a0dcbc5 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -138,20 +138,16 @@ blocks.append({
     },
 })
 
-# Failed CI jobs in main message
+# Failed CI jobs summary in main message (details in thread)
 if failed_ci_jobs:
-    lines = []
-    for j in failed_ci_jobs:
-        url = j.get("url", "")
-        name = j["name"]
-        if url:
-            lines.append(f":x:  <{url}|{name}>")
-        else:
-            lines.append(f":x:  {name}")
+    names = [j["name"] for j in failed_ci_jobs]
+    summary = f":x: *{len(failed_ci_jobs)} CI job(s) failed:* " + ", ".join(f"`{n}`" for n in names[:5])
+    if len(names) > 5:
+        summary += f" _+{len(names) - 5} more_"
     blocks.append({"type": "divider"})
     blocks.append({
         "type": "section",
-        "text": {"type": "mrkdwn", "text": "*Failed CI Jobs:*\n" + "\n".join(lines)},
+        "text": {"type": "mrkdwn", "text": summary},
     })
 
 # Links in main message
@@ -176,7 +172,29 @@ print(make_payload(blocks))
 # THREAD REPLIES (lines 2+) — posted as replies to main message
 # ══════════════════════════════════════════════════════════════════════
 
-# ── Thread 1: Failed/flaky matrix entries ─────────────────────────────
+# ── Thread 1: Failed CI job details ───────────────────────────────────
+if failed_ci_jobs:
+    ci_blocks = []
+    current = "*Failed CI Jobs:*\n"
+    for j in failed_ci_jobs:
+        url = j.get("url", "")
+        name = j["name"]
+        line = f":x:  <{url}|{name}>\n" if url else f":x:  {name}\n"
+        if len(current) + len(line) > 2900:
+            ci_blocks.append({
+                "type": "section",
+                "text": {"type": "mrkdwn", "text": current.rstrip()},
+            })
+            current = ""
+        current += line
+    if current.strip():
+        ci_blocks.append({
+            "type": "section",
+            "text": {"type": "mrkdwn", "text": current.rstrip()},
+        })
+    print(make_payload(ci_blocks))
+
+# ── Thread 2: Failed/flaky matrix entries ─────────────────────────────
 failed_grid = [g for g in grid if g["status"] != "passed"]
 
 if failed_grid:

From ac91a7452e6d18f869ab0d5ed43e31defc4abb7c Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 13:45:51 -0500
Subject: [PATCH 21/60] Show all CI workflow statuses and group test issues by
 workflow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Include ALL workflow jobs in consolidated JSON (not just untracked)
  with has_test_details flag to distinguish tracked vs untracked
- Thread reply 1: CI Workflow Status showing every workflow group
  with pass/fail counts — new workflows automatically visible
- Thread reply 2: Failing and flaky tests grouped by workflow so
  users see which workflow has which test issues
- Main message alerts only on untracked CI failures (notebooks, JuMP)
  since tracked failures already appear in the matrix test grid
---
 ci/utils/aggregate_nightly.py         |  25 ++-
 ci/utils/send_consolidated_summary.sh | 237 ++++++++++++--------------
 2 files changed, 122 insertions(+), 140 deletions(-)

diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index 7df91923d7..a0d6db4c90 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -174,15 +174,15 @@ def aggregate_summaries(summaries):
 
 def parse_workflow_jobs(workflow_jobs_path):
     """Parse GitHub Actions workflow job statuses from JSON file.
-    Returns a list of dicts with job name, conclusion, and URL.
-    Only includes jobs NOT already tracked by per-matrix S3 summaries
-    (i.e., excludes conda-cpp-tests, conda-python-tests,
-    wheel-tests-cuopt, wheel-tests-cuopt-server matrix jobs and
-    their compute-matrix helpers)."""
+    Returns all jobs (except nightly-summary itself) with name,
+    conclusion, URL, and whether they are tracked by per-matrix
+    S3 summaries."""
     if not workflow_jobs_path or not Path(workflow_jobs_path).exists():
         return []
 
-    # Job name prefixes that are already covered by per-matrix S3 reports
+    # Job name prefixes that are covered by per-matrix S3 reports.
+    # These jobs also have detailed test results; other jobs only have
+    # a pass/fail status at the workflow level.
     TRACKED_PREFIXES = (
         "conda-cpp-tests",
         "conda-python-tests",
@@ -200,14 +200,16 @@ def parse_workflow_jobs(workflow_jobs_path):
             # Skip the nightly-summary job itself
             if "nightly-summary" in name.lower():
                 continue
-            # Skip jobs already tracked by per-matrix S3 summaries
-            if any(name.startswith(prefix) for prefix in TRACKED_PREFIXES):
+            # Skip helper jobs (compute-matrix, etc.)
+            if "compute-matrix" in name.lower():
                 continue
+            tracked = any(name.startswith(p) for p in TRACKED_PREFIXES)
             result.append({
                 "name": name,
                 "conclusion": job.get("conclusion", "unknown"),
                 "status": job.get("status", "unknown"),
                 "url": job.get("html_url", ""),
+                "has_test_details": tracked,
             })
         return result
     except (json.JSONDecodeError, OSError) as exc:
@@ -228,9 +230,13 @@ def generate_consolidated_json(agg, date_str, branch, github_run_url="",
     flaky_jobs = sum(1 for g in agg["matrix_grid"] if g["status"] == "flaky")
     passed_jobs = sum(1 for g in agg["matrix_grid"] if g["status"] == "passed")
 
-    # Workflow-level CI job statuses (notebooks, JuMP, etc.)
+    # Workflow-level CI job statuses
     wf_jobs = workflow_jobs or []
     failed_ci_jobs = [j for j in wf_jobs if j["conclusion"] == "failure"]
+    # Jobs without per-matrix S3 tracking (notebooks, JuMP, etc.)
+    untracked_failed = [
+        j for j in failed_ci_jobs if not j.get("has_test_details", False)
+    ]
 
     return {
         "timestamp": datetime.now(timezone.utc).isoformat(),
@@ -252,6 +258,7 @@ def generate_consolidated_json(agg, date_str, branch, github_run_url="",
         "resolved_tests": agg["all_resolved_tests"],
         "workflow_jobs": wf_jobs,
         "failed_ci_jobs": failed_ci_jobs,
+        "untracked_failed_ci_jobs": untracked_failed,
     }
 
 
diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index 609a0dcbc5..d6466d1bd0 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -56,6 +56,7 @@ totals = d.get("test_totals", {})
 grid = d.get("matrix_grid", [])
 has_new = d.get("has_new_failures", False)
 failed_ci_jobs = d.get("failed_ci_jobs", [])
+untracked_failed = d.get("untracked_failed_ci_jobs", [])
 workflow_jobs = d.get("workflow_jobs", [])
 
 total_jobs = jobs.get("total", 0)
@@ -88,11 +89,13 @@ def make_payload(blocks):
 # ══════════════════════════════════════════════════════════════════════
 blocks = []
 
-if failed_ci_count > 0 or (failed_jobs > 0 and has_new):
+untracked_count = len(untracked_failed)
+if untracked_count > 0 or (failed_jobs > 0 and has_new):
     emoji = ":rotating_light:"
     parts = []
-    if failed_ci_count > 0:
-        parts.append(f"{failed_ci_count} CI job(s) failed")
+    if untracked_count > 0:
+        names = [j["name"] for j in untracked_failed]
+        parts.append(f"{untracked_count} CI job(s) failed ({', '.join(names[:3])})")
     if failed_jobs > 0 and has_new:
         parts.append(f"NEW test failures in {failed_jobs} matrix job(s)")
     elif failed_jobs > 0:
@@ -138,10 +141,10 @@ blocks.append({
     },
 })
 
-# Failed CI jobs summary in main message (details in thread)
-if failed_ci_jobs:
-    names = [j["name"] for j in failed_ci_jobs]
-    summary = f":x: *{len(failed_ci_jobs)} CI job(s) failed:* " + ", ".join(f"`{n}`" for n in names[:5])
+# Failed untracked CI jobs in main message (details in thread)
+if untracked_failed:
+    names = [j["name"] for j in untracked_failed]
+    summary = f":x: *{len(untracked_failed)} CI job(s) failed:* " + ", ".join(f"`{n}`" for n in names[:5])
     if len(names) > 5:
         summary += f" _+{len(names) - 5} more_"
     blocks.append({"type": "divider"})
@@ -172,14 +175,37 @@ print(make_payload(blocks))
 # THREAD REPLIES (lines 2+) — posted as replies to main message
 # ══════════════════════════════════════════════════════════════════════
 
-# ── Thread 1: Failed CI job details ───────────────────────────────────
-if failed_ci_jobs:
+# ── Thread 1: CI Workflow Status (all jobs) ───────────────────────────
+# Shows every workflow job so new workflows are automatically visible.
+if workflow_jobs:
+    ci_icons = {"success": ":white_check_mark:", "failure": ":x:",
+                "cancelled": ":no_entry_sign:", "skipped": ":fast_forward:"}
+
+    # Group by workflow prefix (e.g., "conda-cpp-tests", "conda-notebook-tests")
+    wf_groups = {}
+    for j in workflow_jobs:
+        # Use the part before " / " as group name, or full name
+        prefix = j["name"].split(" / ")[0] if " / " in j["name"] else j["name"]
+        wf_groups.setdefault(prefix, []).append(j)
+
     ci_blocks = []
-    current = "*Failed CI Jobs:*\n"
-    for j in failed_ci_jobs:
-        url = j.get("url", "")
-        name = j["name"]
-        line = f":x:  <{url}|{name}>\n" if url else f":x:  {name}\n"
+    current = "*CI Workflow Status:*\n"
+    for group_name, group_jobs in sorted(wf_groups.items()):
+        passed = sum(1 for j in group_jobs if j["conclusion"] == "success")
+        failed = sum(1 for j in group_jobs if j["conclusion"] == "failure")
+        total = len(group_jobs)
+
+        if failed > 0:
+            icon = ":x:"
+            detail = f"{failed}/{total} failed"
+        elif passed == total:
+            icon = ":white_check_mark:"
+            detail = f"{total} passed"
+        else:
+            icon = ":grey_question:"
+            detail = f"{passed}/{total} passed"
+
+        line = f"{icon}  *{group_name}* — {detail}\n"
         if len(current) + len(line) > 2900:
             ci_blocks.append({
                 "type": "section",
@@ -187,6 +213,7 @@ if failed_ci_jobs:
             })
             current = ""
         current += line
+
     if current.strip():
         ci_blocks.append({
             "type": "section",
@@ -194,128 +221,76 @@ if failed_ci_jobs:
         })
     print(make_payload(ci_blocks))
 
-# ── Thread 2: Failed/flaky matrix entries ─────────────────────────────
-failed_grid = [g for g in grid if g["status"] != "passed"]
-
-if failed_grid:
-    test_types = {}
-    for g in failed_grid:
-        tt = g["test_type"]
-        test_types.setdefault(tt, []).append(g)
-
-    grid_blocks = []
-    current_text = ""
-    for tt, entries in sorted(test_types.items()):
-        cells = []
-        for g in entries:
-            icon = status_icons.get(g["status"], ":grey_question:")
-            label = g["matrix_label"]
-            failed_count = g["counts"].get("failed", 0)
-            if failed_count > 0:
-                cells.append(f"{icon} `{label}` ({failed_count} failures)")
-            else:
-                cells.append(f"{icon} `{label}`")
-        section = f"*{tt}*\n" + "\n".join(f"    {c}" for c in cells) + "\n"
-
-        if current_text and len(current_text) + len(section) > 2800:
-            grid_blocks.append({
-                "type": "section",
-                "text": {"type": "mrkdwn", "text": current_text.rstrip()},
-            })
-            current_text = ""
-        current_text += section
-
-    if current_text:
-        grid_blocks.append({
-            "type": "section",
-            "text": {"type": "mrkdwn", "text": current_text.rstrip()},
-        })
-
-    for i in range(0, len(grid_blocks), 48):
-        chunk = grid_blocks[i:i+48]
-        print(make_payload(chunk))
-
-# ── Thread 2: Failure details ─────────────────────────────────────────
-detail_blocks = []
-
+# ── Thread 2: Failing and flaky tests (grouped by workflow) ───────────
+# Build per-workflow test issue lists
 new_failures = d.get("new_failures", [])
-if new_failures:
-    lines = []
-    for f_entry in new_failures[:15]:
-        msg = f_entry.get("message", "")[:80].replace("\n", " ")
-        matrix = f_entry.get("matrix_label", "")
-        lines.append(
-            f":new:  `{f_entry['name']}` ({f_entry['test_type']} / {matrix})\n       {msg}"
-        )
-    if len(new_failures) > 15:
-        lines.append(f"_...and {len(new_failures) - 15} more_")
-    text = "*:rotating_light: New Failures:*\n" + "\n".join(lines)
-    while text:
-        detail_blocks.append({
-            "type": "section",
-            "text": {"type": "mrkdwn", "text": text[:2900]},
-        })
-        text = text[2900:]
-
 recurring = d.get("recurring_failures", [])
-if recurring:
-    lines = []
-    for f_entry in recurring[:15]:
-        matrix = f_entry.get("matrix_label", "")
-        first = f_entry.get("first_seen", "?")
-        lines.append(
-            f":repeat:  `{f_entry['name']}` ({f_entry['test_type']} / {matrix}) \u2014 since {first}"
-        )
-    if len(recurring) > 15:
-        lines.append(f"_...and {len(recurring) - 15} more_")
-    detail_blocks.append({"type": "divider"})
-    detail_blocks.append({
-        "type": "section",
-        "text": {"type": "mrkdwn", "text": "*:x: Recurring Failures:*\n" + "\n".join(lines)},
-    })
-
+flaky = d.get("flaky_tests", [])
 resolved = d.get("resolved_tests", [])
-if resolved:
-    lines = []
-    for r in resolved[:10]:
-        matrix = r.get("matrix_label", "")
-        count = r.get("failure_count", "?")
-        lines.append(
-            f":white_check_mark:  `{r['name']}` ({r['test_type']} / {matrix}) \u2014 failed {count}x"
-        )
-    if len(resolved) > 10:
-        lines.append(f"_...and {len(resolved) - 10} more_")
-    detail_blocks.append({"type": "divider"})
-    detail_blocks.append({
-        "type": "section",
-        "text": {
-            "type": "mrkdwn",
-            "text": "*:white_check_mark: Stabilized (were failing, now pass):*\n" + "\n".join(lines),
-        },
-    })
 
-flaky = d.get("flaky_tests", [])
-if flaky:
-    unique_flaky = {}
-    for f_entry in flaky:
-        key = f_entry["name"]
-        unique_flaky.setdefault(key, []).append(f_entry.get("matrix_label", ""))
-    lines = []
-    for name, matrices in sorted(unique_flaky.items())[:10]:
-        matrix_str = ", ".join(matrices[:3])
-        if len(matrices) > 3:
-            matrix_str += f" +{len(matrices)-3} more"
-        lines.append(f":warning:  `{name}` ({matrix_str})")
-    if len(unique_flaky) > 10:
-        lines.append(f"_...and {len(unique_flaky) - 10} more unique flaky tests_")
-    detail_blocks.append({"type": "divider"})
-    detail_blocks.append({
-        "type": "section",
-        "text": {"type": "mrkdwn", "text": "*:warning: Flaky Tests:*\n" + "\n".join(lines)},
-    })
+# Collect all test issues by test_type (workflow)
+issues_by_wf = {}
+for f_entry in new_failures:
+    tt = f_entry.get("test_type", "unknown")
+    issues_by_wf.setdefault(tt, {"new": [], "recurring": [], "flaky": [], "resolved": []})
+    issues_by_wf[tt]["new"].append(f_entry)
+for f_entry in recurring:
+    tt = f_entry.get("test_type", "unknown")
+    issues_by_wf.setdefault(tt, {"new": [], "recurring": [], "flaky": [], "resolved": []})
+    issues_by_wf[tt]["recurring"].append(f_entry)
+for f_entry in flaky:
+    tt = f_entry.get("test_type", "unknown")
+    issues_by_wf.setdefault(tt, {"new": [], "recurring": [], "flaky": [], "resolved": []})
+    issues_by_wf[tt]["flaky"].append(f_entry)
+for r in resolved:
+    tt = r.get("test_type", "unknown")
+    issues_by_wf.setdefault(tt, {"new": [], "recurring": [], "flaky": [], "resolved": []})
+    issues_by_wf[tt]["resolved"].append(r)
+
+if issues_by_wf:
+    for wf_name, issues in sorted(issues_by_wf.items()):
+        wf_blocks = []
+        wf_text = f"*{wf_name}*\n"
+
+        # New failures
+        for f_entry in issues["new"][:10]:
+            msg = f_entry.get("message", "")[:60].replace("\n", " ")
+            matrix = f_entry.get("matrix_label", "")
+            wf_text += f":new:  `{f_entry['name']}` ({matrix}) — {msg}\n"
+
+        # Recurring failures
+        for f_entry in issues["recurring"][:10]:
+            matrix = f_entry.get("matrix_label", "")
+            first = f_entry.get("first_seen", "?")
+            wf_text += f":repeat:  `{f_entry['name']}` ({matrix}) — since {first}\n"
+
+        # Flaky
+        for f_entry in issues["flaky"][:10]:
+            matrix = f_entry.get("matrix_label", "")
+            wf_text += f":warning:  `{f_entry['name']}` ({matrix})\n"
+
+        # Resolved
+        for r in issues["resolved"][:5]:
+            matrix = r.get("matrix_label", "")
+            count = r.get("failure_count", "?")
+            wf_text += f":white_check_mark:  `{r['name']}` ({matrix}) — was failing {count}x\n"
+
+        # Truncation notes
+        for category, label, limit in [("new", "new failures", 10), ("recurring", "recurring", 10),
+                                        ("flaky", "flaky", 10), ("resolved", "resolved", 5)]:
+            if len(issues[category]) > limit:
+                wf_text += f"_...+{len(issues[category]) - limit} more {label}_\n"
+
+        # Chunk if needed
+        while wf_text:
+            chunk = wf_text[:2900]
+            wf_blocks.append({
+                "type": "section",
+                "text": {"type": "mrkdwn", "text": chunk.rstrip()},
+            })
+            wf_text = wf_text[2900:]
 
-if detail_blocks:
-    print(make_payload(detail_blocks))
+        print(make_payload(wf_blocks))
 PYEOF
 )
 

From 777a58dd7e01d3157676d07b030809b85ee6f7cc Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 14:02:15 -0500
Subject: [PATCH 22/60] Remove @channel ping from Slack notifications

---
 ci/utils/send_consolidated_summary.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index d6466d1bd0..813bf3fd29 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -101,7 +101,7 @@ if untracked_count > 0 or (failed_jobs > 0 and has_new):
     elif failed_jobs > 0:
         parts.append(f"recurring failures in {failed_jobs} matrix job(s)")
     text = " + ".join(parts)
-    mention = "<!channel> "
+    mention = ""
 elif failed_jobs > 0:
     emoji = ":x:"
     text = f"Recurring failures in {failed_jobs} matrix job(s)"

From 9c34a9e31f0432e22c9fb1ad15cc05ab91a77ae9 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 16:26:11 -0500
Subject: [PATCH 23/60] Fix presigned URLs and reorder Slack thread replies

- Map CUOPT_AWS_* to standard AWS env vars before aws s3 presign
  so the CLI has credentials in the container
- Log presign failures instead of swallowing them silently
- Reorder thread: test failures/details first, CI workflow overview last
---
 ci/nightly_summary.sh                 | 15 ++++-
 ci/utils/send_consolidated_summary.sh | 89 +++++++++++++--------------
 2 files changed, 55 insertions(+), 49 deletions(-)

diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index e23b11d1fc..647d08260a 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -73,9 +73,20 @@ python3 "${SCRIPT_DIR}/utils/aggregate_nightly.py" \
     --workflow-jobs "${WORKFLOW_JOBS_JSON}"
 
 # --- Generate presigned URLs for reports (7-day expiry) ---
+# Map CUOPT_AWS_* to standard AWS env vars for the aws CLI
+export AWS_ACCESS_KEY_ID="${CUOPT_AWS_ACCESS_KEY_ID:-${AWS_ACCESS_KEY_ID:-}}"
+export AWS_SECRET_ACCESS_KEY="${CUOPT_AWS_SECRET_ACCESS_KEY:-${AWS_SECRET_ACCESS_KEY:-}}"
+unset AWS_SESSION_TOKEN
+
 PRESIGN_EXPIRY=604800
-PRESIGNED_HTML=$(aws s3 presign "${S3_CONSOLIDATED_HTML}" --expires-in "${PRESIGN_EXPIRY}" 2>/dev/null || echo "")
-PRESIGNED_DASHBOARD=$(aws s3 presign "${S3_DASHBOARD_URI}" --expires-in "${PRESIGN_EXPIRY}" 2>/dev/null || echo "")
+PRESIGNED_HTML=$(aws s3 presign "${S3_CONSOLIDATED_HTML}" --expires-in "${PRESIGN_EXPIRY}" 2>&1) || {
+    echo "WARNING: Failed to generate presigned URL for report: ${PRESIGNED_HTML}" >&2
+    PRESIGNED_HTML=""
+}
+PRESIGNED_DASHBOARD=$(aws s3 presign "${S3_DASHBOARD_URI}" --expires-in "${PRESIGN_EXPIRY}" 2>&1) || {
+    echo "WARNING: Failed to generate presigned URL for dashboard: ${PRESIGNED_DASHBOARD}" >&2
+    PRESIGNED_DASHBOARD=""
+}
 
 # Send consolidated Slack notification if webhook is available and this is a nightly build
 if [ -n "${CUOPT_SLACK_WEBHOOK_URL:-}" ] && [ "${RAPIDS_BUILD_TYPE:-}" = "nightly" ]; then
diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index 813bf3fd29..041a3de0e6 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -175,53 +175,7 @@ print(make_payload(blocks))
 # THREAD REPLIES (lines 2+) — posted as replies to main message
 # ══════════════════════════════════════════════════════════════════════
 
-# ── Thread 1: CI Workflow Status (all jobs) ───────────────────────────
-# Shows every workflow job so new workflows are automatically visible.
-if workflow_jobs:
-    ci_icons = {"success": ":white_check_mark:", "failure": ":x:",
-                "cancelled": ":no_entry_sign:", "skipped": ":fast_forward:"}
-
-    # Group by workflow prefix (e.g., "conda-cpp-tests", "conda-notebook-tests")
-    wf_groups = {}
-    for j in workflow_jobs:
-        # Use the part before " / " as group name, or full name
-        prefix = j["name"].split(" / ")[0] if " / " in j["name"] else j["name"]
-        wf_groups.setdefault(prefix, []).append(j)
-
-    ci_blocks = []
-    current = "*CI Workflow Status:*\n"
-    for group_name, group_jobs in sorted(wf_groups.items()):
-        passed = sum(1 for j in group_jobs if j["conclusion"] == "success")
-        failed = sum(1 for j in group_jobs if j["conclusion"] == "failure")
-        total = len(group_jobs)
-
-        if failed > 0:
-            icon = ":x:"
-            detail = f"{failed}/{total} failed"
-        elif passed == total:
-            icon = ":white_check_mark:"
-            detail = f"{total} passed"
-        else:
-            icon = ":grey_question:"
-            detail = f"{passed}/{total} passed"
-
-        line = f"{icon}  *{group_name}* — {detail}\n"
-        if len(current) + len(line) > 2900:
-            ci_blocks.append({
-                "type": "section",
-                "text": {"type": "mrkdwn", "text": current.rstrip()},
-            })
-            current = ""
-        current += line
-
-    if current.strip():
-        ci_blocks.append({
-            "type": "section",
-            "text": {"type": "mrkdwn", "text": current.rstrip()},
-        })
-    print(make_payload(ci_blocks))
-
-# ── Thread 2: Failing and flaky tests (grouped by workflow) ───────────
+# ── Thread 1: Failing and flaky tests (grouped by workflow) ───────────
 # Build per-workflow test issue lists
 new_failures = d.get("new_failures", [])
 recurring = d.get("recurring_failures", [])
@@ -291,6 +245,47 @@ if issues_by_wf:
             wf_text = wf_text[2900:]
 
         print(make_payload(wf_blocks))
+
+# ── Thread 2: CI Workflow Status (all jobs) ───────────────────────────
+# Shows every workflow job so new workflows are automatically visible.
+if workflow_jobs:
+    wf_groups = {}
+    for j in workflow_jobs:
+        prefix = j["name"].split(" / ")[0] if " / " in j["name"] else j["name"]
+        wf_groups.setdefault(prefix, []).append(j)
+
+    ci_blocks = []
+    current = "*CI Workflow Status:*\n"
+    for group_name, group_jobs in sorted(wf_groups.items()):
+        passed = sum(1 for j in group_jobs if j["conclusion"] == "success")
+        failed = sum(1 for j in group_jobs if j["conclusion"] == "failure")
+        total = len(group_jobs)
+
+        if failed > 0:
+            icon = ":x:"
+            detail = f"{failed}/{total} failed"
+        elif passed == total:
+            icon = ":white_check_mark:"
+            detail = f"{total} passed"
+        else:
+            icon = ":grey_question:"
+            detail = f"{passed}/{total} passed"
+
+        line = f"{icon}  *{group_name}* — {detail}\n"
+        if len(current) + len(line) > 2900:
+            ci_blocks.append({
+                "type": "section",
+                "text": {"type": "mrkdwn", "text": current.rstrip()},
+            })
+            current = ""
+        current += line
+
+    if current.strip():
+        ci_blocks.append({
+            "type": "section",
+            "text": {"type": "mrkdwn", "text": current.rstrip()},
+        })
+    print(make_payload(ci_blocks))
 PYEOF
 )
 

From 8712ec73883f55ba8ebead5a51a85cdb94761926 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 16:43:19 -0500
Subject: [PATCH 24/60] Name failing workflows in Slack and add build summary

- Main message now lists which workflows have failures by name
  (e.g., "Failures in: conda-notebook-tests, wheel-tests-cuopt")
  with per-workflow failure counts
- Add build-summary job to build.yaml that sends a Slack message
  after all builds complete, showing pass/fail per build job
- Build summary queries GitHub API for job statuses, grouped by
  workflow prefix (cpp-build, wheel-build-cuopt, docs-build, etc.)
---
 .github/workflows/build.yaml          |  38 +++++++
 ci/build_summary.sh                   | 152 ++++++++++++++++++++++++++
 ci/utils/send_consolidated_summary.sh |  63 +++++++----
 3 files changed, 233 insertions(+), 20 deletions(-)
 create mode 100755 ci/build_summary.sh

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index a945cde8ec..910f469936 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -242,6 +242,44 @@ jobs:
             -f date="${INPUT_DATE}"             \
             -f sha="${INPUT_SHA}"
 
+  build-summary:
+    if: ${{ always() && (inputs.build_type == 'nightly') }}
+    needs:
+      - cpp-build
+      - python-build
+      - upload-conda
+      - wheel-build-cuopt-mps-parser
+      - wheel-publish-cuopt-mps-parser
+      - wheel-build-libcuopt
+      - wheel-publish-libcuopt
+      - wheel-build-cuopt
+      - wheel-publish-cuopt
+      - wheel-build-cuopt-server
+      - wheel-publish-cuopt-server
+      - wheel-build-cuopt-sh-client
+      - wheel-publish-cuopt-sh-client
+      - docs-build
+    runs-on: linux-amd64-cpu4
+    container:
+      image: python:3.12-slim
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.sha }}
+      - name: Install dependencies
+        run: apt-get update && apt-get install -y --no-install-recommends curl
+      - name: Send build summary
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+          GITHUB_SERVER_URL: ${{ github.server_url }}
+          CUOPT_SLACK_WEBHOOK_URL: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
+          SLACK_BOT_TOKEN: ${{ secrets.CUOPT_SLACK_BOT_TOKEN }}
+          SLACK_CHANNEL_ID: ${{ secrets.CUOPT_SLACK_CHANNEL_ID }}
+          RAPIDS_BRANCH: ${{ inputs.branch }}
+        run: bash ci/build_summary.sh
+
   build-images:
     needs:
       - wheel-publish-cuopt
diff --git a/ci/build_summary.sh b/ci/build_summary.sh
new file mode 100755
index 0000000000..be3e028eab
--- /dev/null
+++ b/ci/build_summary.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Send a Slack notification summarizing the build workflow status.
+# Queries the GitHub API for job statuses and posts a compact message.
+
+set -euo pipefail
+
+BRANCH="${RAPIDS_BRANCH:-main}"
+RUN_DATE="$(date +%F)"
+GITHUB_RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY:-NVIDIA/cuopt}/actions/runs/${GITHUB_RUN_ID:-}"
+SLACK_WEBHOOK_URL="${CUOPT_SLACK_WEBHOOK_URL:-}"
+SLACK_BOT_TOKEN="${SLACK_BOT_TOKEN:-}"
+SLACK_CHANNEL_ID="${SLACK_CHANNEL_ID:-}"
+
+if [ -z "${SLACK_WEBHOOK_URL}" ] && [ -z "${SLACK_BOT_TOKEN}" ]; then
+    echo "No Slack credentials set, skipping build summary."
+    exit 0
+fi
+
+# Fetch workflow job statuses
+JOBS_JSON=""
+if [ -n "${GITHUB_TOKEN:-}" ] && [ -n "${GITHUB_RUN_ID:-}" ] && [ -n "${GITHUB_REPOSITORY:-}" ]; then
+    echo "Fetching build job statuses from GitHub API..."
+    JOBS_JSON=$(curl -s -L \
+        -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+        -H "Accept: application/vnd.github+json" \
+        "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/jobs?per_page=100")
+fi
+
+# Generate Slack payload
+PAYLOAD=$(python3 -c "
+import json, sys
+
+jobs_json = sys.argv[1]
+branch = sys.argv[2]
+date = sys.argv[3]
+run_url = sys.argv[4]
+
+jobs = json.loads(jobs_json).get('jobs', []) if jobs_json else []
+
+# Filter out build-summary itself and compute-matrix helpers
+jobs = [j for j in jobs
+        if 'build-summary' not in j.get('name', '').lower()
+        and 'compute-matrix' not in j.get('name', '').lower()]
+
+# Group by workflow prefix
+groups = {}
+for j in jobs:
+    name = j.get('name', '')
+    prefix = name.split(' / ')[0] if ' / ' in name else name
+    groups.setdefault(prefix, []).append(j)
+
+total = len(jobs)
+failed_count = sum(1 for j in jobs if j.get('conclusion') == 'failure')
+passed_count = sum(1 for j in jobs if j.get('conclusion') == 'success')
+
+if failed_count > 0:
+    emoji = ':x:'
+    status = f'{failed_count} build job(s) failed'
+else:
+    emoji = ':white_check_mark:'
+    status = f'All {passed_count} build jobs passed'
+
+blocks = []
+blocks.append({
+    'type': 'header',
+    'text': {'type': 'plain_text', 'text': f'cuOpt Build \u2014 {branch} \u2014 {date}', 'emoji': True},
+})
+blocks.append({
+    'type': 'section',
+    'text': {'type': 'mrkdwn', 'text': f'{emoji} *{status}*'},
+})
+blocks.append({'type': 'divider'})
+
+# Build status per group
+lines = []
+for group_name, group_jobs in sorted(groups.items()):
+    g_passed = sum(1 for j in group_jobs if j.get('conclusion') == 'success')
+    g_failed = sum(1 for j in group_jobs if j.get('conclusion') == 'failure')
+    g_total = len(group_jobs)
+
+    if g_failed > 0:
+        icon = ':x:'
+        detail = f'{g_failed}/{g_total} failed'
+    elif g_passed == g_total:
+        icon = ':white_check_mark:'
+        detail = f'{g_total} passed'
+    else:
+        icon = ':grey_question:'
+        detail = f'{g_passed}/{g_total} passed'
+    lines.append(f'{icon}  *{group_name}* \u2014 {detail}')
+
+text = '\n'.join(lines)
+if len(text) > 2900:
+    text = text[:2900] + '\n_...truncated_'
+blocks.append({
+    'type': 'section',
+    'text': {'type': 'mrkdwn', 'text': text},
+})
+
+# Link
+if run_url:
+    blocks.append({'type': 'divider'})
+    blocks.append({
+        'type': 'context',
+        'elements': [{'type': 'mrkdwn', 'text': f'<{run_url}|:github: GitHub Actions>'}],
+    })
+
+print(json.dumps({
+    'username': 'cuOpt Build Bot',
+    'icon_emoji': ':package:',
+    'blocks': blocks,
+}))
+" "${JOBS_JSON}" "${BRANCH}" "${RUN_DATE}" "${GITHUB_RUN_URL}")
+
+# Send via bot token (preferred) or webhook
+echo "Sending build summary to Slack..."
+if [ -n "${SLACK_BOT_TOKEN}" ] && [ -n "${SLACK_CHANNEL_ID}" ]; then
+    BOT_PAYLOAD=$(python3 -c "
+import json, sys
+p = json.loads(sys.argv[1])
+p['channel'] = sys.argv[2]
+print(json.dumps(p))
+" "${PAYLOAD}" "${SLACK_CHANNEL_ID}")
+
+    RESPONSE=$(curl -s -X POST \
+        -H "Authorization: Bearer ${SLACK_BOT_TOKEN}" \
+        -H "Content-Type: application/json" \
+        --data "${BOT_PAYLOAD}" \
+        "https://slack.com/api/chat.postMessage")
+
+    OK=$(echo "${RESPONSE}" | python3 -c "import json,sys; print(json.load(sys.stdin).get('ok',''))" 2>/dev/null || echo "")
+    if [ "${OK}" != "True" ]; then
+        echo "WARNING: chat.postMessage failed: ${RESPONSE}" >&2
+        # Fall back to webhook
+        curl -s -X POST -H 'Content-type: application/json' --data "${PAYLOAD}" "${SLACK_WEBHOOK_URL}" || true
+    else
+        echo "Build summary posted to Slack."
+    fi
+elif [ -n "${SLACK_WEBHOOK_URL}" ]; then
+    response=$(curl -s -X POST \
+        -H 'Content-type: application/json' \
+        --data "${PAYLOAD}" \
+        "${SLACK_WEBHOOK_URL}")
+    if [ "${response}" != "ok" ]; then
+        echo "WARNING: Slack webhook returned: ${response}" >&2
+    else
+        echo "Build summary posted to Slack."
+    fi
+fi
diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index 041a3de0e6..6e3e2b0543 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -89,24 +89,35 @@ def make_payload(blocks):
 # ══════════════════════════════════════════════════════════════════════
 blocks = []
 
+# Identify which workflows have failures (from both CI jobs and matrix grid)
+failing_workflows = set()
+for j in failed_ci_jobs:
+    prefix = j["name"].split(" / ")[0] if " / " in j["name"] else j["name"]
+    failing_workflows.add(prefix)
+for g in grid:
+    if g["status"].startswith("failed"):
+        failing_workflows.add(g["test_type"])
+flaky_workflows = set()
+for g in grid:
+    if g["status"] == "flaky":
+        flaky_workflows.add(g["test_type"])
+
+has_failures = len(failing_workflows) > 0
 untracked_count = len(untracked_failed)
-if untracked_count > 0 or (failed_jobs > 0 and has_new):
+
+if has_failures and (has_new or untracked_count > 0):
     emoji = ":rotating_light:"
-    parts = []
-    if untracked_count > 0:
-        names = [j["name"] for j in untracked_failed]
-        parts.append(f"{untracked_count} CI job(s) failed ({', '.join(names[:3])})")
-    if failed_jobs > 0 and has_new:
-        parts.append(f"NEW test failures in {failed_jobs} matrix job(s)")
-    elif failed_jobs > 0:
-        parts.append(f"recurring failures in {failed_jobs} matrix job(s)")
-    text = " + ".join(parts)
+    wf_list = ", ".join(sorted(failing_workflows)[:5])
+    if len(failing_workflows) > 5:
+        wf_list += f" +{len(failing_workflows) - 5} more"
+    text = f"Failures in: {wf_list}"
     mention = ""
-elif failed_jobs > 0:
+elif has_failures:
     emoji = ":x:"
-    text = f"Recurring failures in {failed_jobs} matrix job(s)"
+    wf_list = ", ".join(sorted(failing_workflows)[:5])
+    text = f"Recurring failures in: {wf_list}"
     mention = ""
-elif flaky_jobs > 0:
+elif flaky_workflows:
     emoji = ":large_yellow_circle:"
     text = "All jobs passed but flaky tests detected"
     mention = ""
@@ -141,16 +152,28 @@ blocks.append({
     },
 })
 
-# Failed untracked CI jobs in main message (details in thread)
-if untracked_failed:
-    names = [j["name"] for j in untracked_failed]
-    summary = f":x: *{len(untracked_failed)} CI job(s) failed:* " + ", ".join(f"`{n}`" for n in names[:5])
-    if len(names) > 5:
-        summary += f" _+{len(names) - 5} more_"
+# Per-workflow failure summary in main message
+if failing_workflows:
+    lines = []
+    for wf in sorted(failing_workflows):
+        # Count matrix failures for this workflow
+        wf_grid = [g for g in grid if g["test_type"] == wf and g["status"].startswith("failed")]
+        # Count CI-level failures
+        wf_ci = [j for j in failed_ci_jobs
+                  if (j["name"].split(" / ")[0] if " / " in j["name"] else j["name"]) == wf]
+        parts = []
+        if wf_grid:
+            parts.append(f"{len(wf_grid)} matrix job(s)")
+        if wf_ci and not any(not j.get("has_test_details", False) for j in wf_ci):
+            pass  # already covered by matrix
+        elif wf_ci:
+            parts.append("CI job failed")
+        detail = ", ".join(parts) if parts else "failed"
+        lines.append(f":x:  *{wf}* — {detail}")
     blocks.append({"type": "divider"})
     blocks.append({
         "type": "section",
-        "text": {"type": "mrkdwn", "text": summary},
+        "text": {"type": "mrkdwn", "text": "\n".join(lines)},
     })
 
 # Links in main message

From 4419b928d17043c2cdfc3eb16bd5bce3717e6562 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 17:05:23 -0500
Subject: [PATCH 25/60] Add summary-only flag to test.yaml for quick
 nightly-summary testing

Skips all test jobs when summary-only=true, so nightly-summary runs
immediately without waiting for GPU runners.
---
 .github/workflows/test.yaml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 097f607244..2bbf8105e2 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -24,9 +24,14 @@ on:
         description: "build_type: one of [branch, nightly, pull-request]"
         type: string
         default: nightly
+      summary-only:
+        description: "If true, skip all test jobs and run only nightly-summary"
+        type: boolean
+        default: false
 
 jobs:
   conda-cpp-tests:
+    if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main
     with:
       build_type: ${{ inputs.build_type }}
@@ -42,6 +47,7 @@ jobs:
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
   conda-python-tests:
+    if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main
     with:
       run_codecov: false
@@ -58,6 +64,7 @@ jobs:
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
   wheel-tests-cuopt:
+    if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
     with:
       build_type: ${{ inputs.build_type }}
@@ -73,6 +80,7 @@ jobs:
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
   wheel-tests-cuopt-server:
+    if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
     with:
       build_type: ${{ inputs.build_type }}
@@ -88,6 +96,7 @@ jobs:
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
   conda-notebook-tests:
+    if: ${{ !inputs.summary-only }}
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main
     with:

From 1e711b38a819da2e1ca3ee329044bd5973a6f4e1 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 17:08:17 -0500
Subject: [PATCH 26/60] Make dashboard self-contained and add summary-only test
 flag

- Embed index.json and consolidated data directly into dashboard HTML
  during aggregation so it works on private S3 buckets without
  runtime fetches (no more 403 errors)
- Dashboard falls back to S3 fetch if embedded data is absent
- Add summary-only input to test.yaml to skip all test jobs and run
  only nightly-summary (avoids waiting for GPU runners when testing)
---
 ci/dashboard/index.html       | 56 +++++++++++++++++++++--------------
 ci/utils/aggregate_nightly.py | 28 ++++++++++++++++--
 2 files changed, 60 insertions(+), 24 deletions(-)

diff --git a/ci/dashboard/index.html b/ci/dashboard/index.html
index 9b56a7c915..fff5ca0d7c 100644
--- a/ci/dashboard/index.html
+++ b/ci/dashboard/index.html
@@ -223,37 +223,49 @@ <h1>cuOpt Nightly</h1>
 /* Init                                                                */
 /* ================================================================== */
 async function init() {
-  // Determine S3 base URL from query param or auto-detect from location
-  const params = new URLSearchParams(window.location.search);
-  S.baseUrl = params.get('base_url') || deriveBaseUrl();
+  // Use embedded data if available (injected by aggregate_nightly.py)
+  if (window.__EMBEDDED_INDEX__) {
+    S.index = window.__EMBEDDED_INDEX__;
+    S.embedded = true;
+    if (window.__EMBEDDED_CONSOLIDATED__) {
+      S.current = window.__EMBEDDED_CONSOLIDATED__;
+    }
+  } else {
+    // Fall back to fetching from S3
+    const params = new URLSearchParams(window.location.search);
+    S.baseUrl = params.get('base_url') || deriveBaseUrl();
 
-  if (!S.baseUrl) {
-    showEmpty('Set <code>?base_url=https://...</code> to the S3 base URL for ci_test_reports/nightly/');
-    return;
-  }
+    if (!S.baseUrl) {
+      showEmpty('Set <code>?base_url=https://...</code> to the S3 base URL for ci_test_reports/nightly/');
+      return;
+    }
 
-  // Ensure trailing slash
-  if (!S.baseUrl.endsWith('/')) S.baseUrl += '/';
+    if (!S.baseUrl.endsWith('/')) S.baseUrl += '/';
 
-  try {
-    // Load index
-    const indexResp = await fetch(S.baseUrl + 'index.json');
-    if (!indexResp.ok) throw new Error(`index.json: ${indexResp.status}`);
-    S.index = await indexResp.json();
-  } catch (e) {
-    showEmpty(`Failed to load index.json from <code>${esc(S.baseUrl)}</code>.<br>${esc(e.message)}`);
-    return;
+    try {
+      const indexResp = await fetch(S.baseUrl + 'index.json');
+      if (!indexResp.ok) throw new Error(`index.json: ${indexResp.status}`);
+      S.index = await indexResp.json();
+    } catch (e) {
+      showEmpty(`Failed to load index.json from <code>${esc(S.baseUrl)}</code>.<br>${esc(e.message)}`);
+      return;
+    }
   }
 
   populateDateSelector();
   setupEventListeners();
 
-  // Load most recent date
-  const dates = Object.keys(S.index.dates || {}).sort().reverse();
-  if (dates.length > 0) {
-    await loadDate(dates[0]);
+  if (S.current) {
+    // Already have consolidated data from embedding
+    populateTestTypeFilters();
+    render();
   } else {
-    showEmpty('No nightly data available yet.');
+    const dates = Object.keys(S.index.dates || {}).sort().reverse();
+    if (dates.length > 0) {
+      await loadDate(dates[0]);
+    } else {
+      showEmpty('No nightly data available yet.');
+    }
   }
 }
 
diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index a0d6db4c90..2219bb85aa 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -704,11 +704,35 @@ def main():
             output_dir,
         )
 
-    # ---- Step 6: Upload dashboard ----
+    # ---- Step 6: Upload dashboard (self-contained with embedded data) ----
     if args.s3_dashboard_uri and args.dashboard_dir:
         dashboard_file = Path(args.dashboard_dir) / "index.html"
         if dashboard_file.exists():
-            s3_upload(str(dashboard_file), args.s3_dashboard_uri)
+            # Read the index.json we just uploaded/created
+            index_path = output_dir / "index.json"
+            index_data = {}
+            if index_path.exists():
+                with open(index_path) as f:
+                    index_data = json.load(f)
+
+            # Inject data into dashboard HTML so it works without S3 fetches
+            dashboard_html = dashboard_file.read_text()
+            inject_script = (
+                "<script>\n"
+                "// Embedded data — injected by aggregate_nightly.py\n"
+                f"window.__EMBEDDED_INDEX__ = {json.dumps(index_data)};\n"
+                f"window.__EMBEDDED_CONSOLIDATED__ = {json.dumps(consolidated)};\n"
+                "</script>\n"
+            )
+            # Insert before </head>
+            dashboard_html = dashboard_html.replace(
+                "</head>", inject_script + "</head>"
+            )
+
+            embedded_path = output_dir / "dashboard.html"
+            embedded_path.write_text(dashboard_html)
+            s3_upload(str(embedded_path), args.s3_dashboard_uri)
+            print(f"Dashboard uploaded with embedded data")
         else:
             print(
                 f"WARNING: Dashboard not found at {dashboard_file}",

From cc9246e9598f42834718da75677e094967c1c6a5 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 14 Apr 2026 21:04:20 -0500
Subject: [PATCH 27/60] Show Failures tab first in dashboard instead of Matrix
 Grid

---
 ci/dashboard/index.html | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/dashboard/index.html b/ci/dashboard/index.html
index fff5ca0d7c..511203af00 100644
--- a/ci/dashboard/index.html
+++ b/ci/dashboard/index.html
@@ -197,8 +197,8 @@ <h1>cuOpt Nightly</h1>
   <div class="summary-grid" id="summary-cards"></div>
 
   <div class="tab-bar" id="tab-bar">
-    <button class="tab-btn active" data-tab="matrix">Matrix Grid</button>
-    <button class="tab-btn" data-tab="failures">Failures</button>
+    <button class="tab-btn active" data-tab="failures">Failures</button>
+    <button class="tab-btn" data-tab="matrix">Matrix Grid</button>
     <button class="tab-btn" data-tab="flaky">Flaky</button>
     <button class="tab-btn" data-tab="resolved">Stabilized</button>
     <button class="tab-btn" data-tab="trends">Trends</button>
@@ -215,7 +215,7 @@ <h1>cuOpt Nightly</h1>
   baseUrl: '',          // Set by config or URL param
   index: null,          // index.json data
   current: null,        // Current day's consolidated_summary.json
-  activeTab: 'matrix',
+  activeTab: 'failures',
   filters: { testType: new Set(), status: 'all' },
 };
 

From 1b45e2413135e8bfbe742341b573c8b62eb1bb1c Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 10:17:40 -0500
Subject: [PATCH 28/60] Trim verbose Slack output: compact stats, failures-only
 CI status

- Test totals: only show failed/flaky counts, skip passed/skipped/total
- CI Workflow Status thread: only list failing workflows, one-line
  summary for passing ones
---
 ci/utils/send_consolidated_summary.sh | 52 +++++++++++----------------
 1 file changed, 20 insertions(+), 32 deletions(-)

diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index 6e3e2b0543..0b57aa839f 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -128,13 +128,14 @@ else:
         text += f", all {passed_ci_count} CI jobs succeeded"
     mention = ""
 
-stats = (
-    f":white_check_mark: {totals.get('passed', 0)} passed  |  "
-    f":x: {totals.get('failed', 0)} failed  |  "
-    f":warning: {totals.get('flaky', 0)} flaky  |  "
-    f":fast_forward: {totals.get('skipped', 0)} skipped  |  "
-    f"Total: {totals.get('total', 0)}"
-)
+stats_parts = []
+if totals.get("failed", 0) > 0:
+    stats_parts.append(f":x: {totals['failed']} failed")
+if totals.get("flaky", 0) > 0:
+    stats_parts.append(f":warning: {totals['flaky']} flaky")
+if not stats_parts:
+    stats_parts.append(f":white_check_mark: {totals.get('total', 0)} tests passed")
+stats = "  |  ".join(stats_parts)
 
 blocks.append({
     "type": "header",
@@ -269,46 +270,33 @@ if issues_by_wf:
 
         print(make_payload(wf_blocks))
 
-# ── Thread 2: CI Workflow Status (all jobs) ───────────────────────────
-# Shows every workflow job so new workflows are automatically visible.
+# ── Thread 2: CI Workflow Status (only failures + summary) ────────────
 if workflow_jobs:
     wf_groups = {}
     for j in workflow_jobs:
         prefix = j["name"].split(" / ")[0] if " / " in j["name"] else j["name"]
         wf_groups.setdefault(prefix, []).append(j)
 
-    ci_blocks = []
-    current = "*CI Workflow Status:*\n"
+    failed_lines = []
+    passed_count = 0
     for group_name, group_jobs in sorted(wf_groups.items()):
         passed = sum(1 for j in group_jobs if j["conclusion"] == "success")
         failed = sum(1 for j in group_jobs if j["conclusion"] == "failure")
         total = len(group_jobs)
 
         if failed > 0:
-            icon = ":x:"
-            detail = f"{failed}/{total} failed"
-        elif passed == total:
-            icon = ":white_check_mark:"
-            detail = f"{total} passed"
+            failed_lines.append(f":x:  *{group_name}* — {failed}/{total} failed")
         else:
-            icon = ":grey_question:"
-            detail = f"{passed}/{total} passed"
-
-        line = f"{icon}  *{group_name}* — {detail}\n"
-        if len(current) + len(line) > 2900:
-            ci_blocks.append({
-                "type": "section",
-                "text": {"type": "mrkdwn", "text": current.rstrip()},
-            })
-            current = ""
-        current += line
+            passed_count += 1
 
-    if current.strip():
-        ci_blocks.append({
+    if failed_lines:
+        text = "*Failed CI Workflows:*\n" + "\n".join(failed_lines)
+        if passed_count > 0:
+            text += f"\n_{passed_count} other workflow(s) passed_"
+        print(make_payload([{
             "type": "section",
-            "text": {"type": "mrkdwn", "text": current.rstrip()},
-        })
-    print(make_payload(ci_blocks))
+            "text": {"type": "mrkdwn", "text": text},
+        }]))
 PYEOF
 )
 

From 78c1e38b2f6e5a791fcf0b47d479464bbf24d331 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 13:24:43 -0500
Subject: [PATCH 29/60] Fix build summary argument list too long error

Write GitHub API response to a temp file instead of passing it as
a shell argument to Python. The jobs JSON for a full build matrix
exceeds the OS argument length limit.
---
 ci/build_summary.sh | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/ci/build_summary.sh b/ci/build_summary.sh
index be3e028eab..e8fd81a436 100755
--- a/ci/build_summary.sh
+++ b/ci/build_summary.sh
@@ -20,25 +20,29 @@ if [ -z "${SLACK_WEBHOOK_URL}" ] && [ -z "${SLACK_BOT_TOKEN}" ]; then
 fi
 
 # Fetch workflow job statuses
-JOBS_JSON=""
+JOBS_FILE=$(mktemp)
 if [ -n "${GITHUB_TOKEN:-}" ] && [ -n "${GITHUB_RUN_ID:-}" ] && [ -n "${GITHUB_REPOSITORY:-}" ]; then
     echo "Fetching build job statuses from GitHub API..."
-    JOBS_JSON=$(curl -s -L \
+    curl -s -L \
         -H "Authorization: Bearer ${GITHUB_TOKEN}" \
         -H "Accept: application/vnd.github+json" \
-        "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/jobs?per_page=100")
+        "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/jobs?per_page=100" \
+        > "${JOBS_FILE}"
+else
+    echo "{}" > "${JOBS_FILE}"
 fi
 
 # Generate Slack payload
 PAYLOAD=$(python3 -c "
 import json, sys
 
-jobs_json = sys.argv[1]
+with open(sys.argv[1]) as f:
+    data = json.load(f)
 branch = sys.argv[2]
 date = sys.argv[3]
 run_url = sys.argv[4]
 
-jobs = json.loads(jobs_json).get('jobs', []) if jobs_json else []
+jobs = data.get('jobs', [])
 
 # Filter out build-summary itself and compute-matrix helpers
 jobs = [j for j in jobs
@@ -113,7 +117,9 @@ print(json.dumps({
     'icon_emoji': ':package:',
     'blocks': blocks,
 }))
-" "${JOBS_JSON}" "${BRANCH}" "${RUN_DATE}" "${GITHUB_RUN_URL}")
+" "${JOBS_FILE}" "${BRANCH}" "${RUN_DATE}" "${GITHUB_RUN_URL}")
+
+rm -f "${JOBS_FILE}"
 
 # Send via bot token (preferred) or webhook
 echo "Sending build summary to Slack..."

From dd758865fc9d12327e1e45c106dfaeb585dbd0d7 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 13:27:34 -0500
Subject: [PATCH 30/60] Add summary-only flag to build.yaml for quick
 build-summary testing

Skips all build/publish/test/image jobs when summary-only=true,
so build-summary runs immediately without waiting for runners.
---
 .github/workflows/build.yaml | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 910f469936..ee455a4452 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -37,6 +37,10 @@ on:
           If 'true', trigger the test workflow after all builds complete.
         type: boolean
         default: false
+      summary-only:
+        description: "If true, skip all build jobs and run only build-summary"
+        type: boolean
+        default: false
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
@@ -44,6 +48,7 @@ concurrency:
 
 jobs:
   cpp-build:
+    if: ${{ !inputs.summary-only }}
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@main
     with:
@@ -53,6 +58,7 @@ jobs:
       sha: ${{ inputs.sha }}
       script: ci/build_cpp.sh
   python-build:
+    if: ${{ !inputs.summary-only }}
     needs: [cpp-build]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@main
@@ -63,6 +69,7 @@ jobs:
       sha: ${{ inputs.sha }}
       script: ci/build_python.sh
   upload-conda:
+    if: ${{ !inputs.summary-only }}
     needs: [cpp-build, python-build]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@main
@@ -72,6 +79,7 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
   wheel-build-cuopt-mps-parser:
+    if: ${{ !inputs.summary-only }}
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main
     with:
@@ -86,6 +94,7 @@ jobs:
       # need 1 build per Python version and arch (but CUDA version doesn't matter so choose the latest)
       matrix_filter: 'group_by([.ARCH, (.PY_VER |split(".") | map(tonumber))])|map(max_by([(.CUDA_VER|split(".")|map(tonumber))]))'
   wheel-publish-cuopt-mps-parser:
+    if: ${{ !inputs.summary-only }}
     needs: wheel-build-cuopt-mps-parser
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main
@@ -97,6 +106,7 @@ jobs:
       package-name: cuopt_mps_parser
       package-type: python
   wheel-build-libcuopt:
+    if: ${{ !inputs.summary-only }}
     needs: wheel-build-cuopt-mps-parser
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main
@@ -110,6 +120,7 @@ jobs:
       package-type: cpp
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
   wheel-publish-libcuopt:
+    if: ${{ !inputs.summary-only }}
     needs: wheel-build-libcuopt
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main
@@ -121,6 +132,7 @@ jobs:
       package-name: libcuopt
       package-type: cpp
   wheel-build-cuopt:
+    if: ${{ !inputs.summary-only }}
     needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main
@@ -133,6 +145,7 @@ jobs:
       package-name: cuopt
       package-type: python
   wheel-publish-cuopt:
+    if: ${{ !inputs.summary-only }}
     needs: wheel-build-cuopt
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main
@@ -144,6 +157,7 @@ jobs:
       package-name: cuopt
       package-type: python
   wheel-build-cuopt-server:
+    if: ${{ !inputs.summary-only }}
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main
     with:
@@ -158,6 +172,7 @@ jobs:
       # Only need 1 package per CUDA major version. This selects "ARCH=amd64 + the latest supported Python, 1 job per major CUDA version".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
   wheel-publish-cuopt-server:
+    if: ${{ !inputs.summary-only }}
     needs: wheel-build-cuopt-server
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main
@@ -169,6 +184,7 @@ jobs:
       package-name: cuopt_server
       package-type: python
   docs-build:
+    if: ${{ !inputs.summary-only }}
     needs: [python-build]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main
@@ -184,6 +200,7 @@ jobs:
       container_image: "rapidsai/ci-conda:26.06-latest"
       script: "ci/build_docs.sh"
   wheel-build-cuopt-sh-client:
+    if: ${{ !inputs.summary-only }}
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main
     with:
@@ -199,6 +216,7 @@ jobs:
       # only need 1 build (noarch package): this selects amd64, oldest-supported Python, latest-supported CUDA
       matrix_filter: '[map(select(.ARCH == "amd64")) | min_by((.PY_VER | split(".") | map(tonumber)), (.CUDA_VER | split(".") | map(-tonumber)))]'
   wheel-publish-cuopt-sh-client:
+    if: ${{ !inputs.summary-only }}
     needs: wheel-build-cuopt-sh-client
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main
@@ -219,7 +237,7 @@ jobs:
       - wheel-publish-cuopt-server
       - wheel-publish-cuopt-sh-client
       - wheel-publish-libcuopt
-    if: inputs.trigger-tests
+    if: ${{ inputs.trigger-tests && !inputs.summary-only }}
     runs-on: ubuntu-latest
     # ref: https://docs.github.com/en/actions/reference/security/secure-use#use-an-intermediate-environment-variable
     env:
@@ -281,6 +299,7 @@ jobs:
         run: bash ci/build_summary.sh
 
   build-images:
+    if: ${{ !inputs.summary-only }}
     needs:
       - wheel-publish-cuopt
       - wheel-publish-cuopt-server

From bd1ec6fbdd2fdaf7b26530faee32c7f8fc25aa95 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 14:12:19 -0500
Subject: [PATCH 31/60] Show CI job pass/fail counts in main Slack message

Use GitHub API job counts (e.g., "1/11 failed") instead of vague
"failed" or "matrix job(s)" in the per-workflow failure summary.
---
 ci/utils/send_consolidated_summary.sh | 32 ++++++++++++++-------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index 0b57aa839f..bf63b48d94 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -153,24 +153,26 @@ blocks.append({
     },
 })
 
-# Per-workflow failure summary in main message
+# Per-workflow failure summary using CI job counts from GitHub API
+# Build a lookup: workflow prefix -> (failed, total) from workflow_jobs
+wf_counts = {}
+for j in workflow_jobs:
+    prefix = j["name"].split(" / ")[0] if " / " in j["name"] else j["name"]
+    wf_counts.setdefault(prefix, {"failed": 0, "total": 0})
+    wf_counts[prefix]["total"] += 1
+    if j["conclusion"] == "failure":
+        wf_counts[prefix]["failed"] += 1
+
 if failing_workflows:
     lines = []
     for wf in sorted(failing_workflows):
-        # Count matrix failures for this workflow
-        wf_grid = [g for g in grid if g["test_type"] == wf and g["status"].startswith("failed")]
-        # Count CI-level failures
-        wf_ci = [j for j in failed_ci_jobs
-                  if (j["name"].split(" / ")[0] if " / " in j["name"] else j["name"]) == wf]
-        parts = []
-        if wf_grid:
-            parts.append(f"{len(wf_grid)} matrix job(s)")
-        if wf_ci and not any(not j.get("has_test_details", False) for j in wf_ci):
-            pass  # already covered by matrix
-        elif wf_ci:
-            parts.append("CI job failed")
-        detail = ", ".join(parts) if parts else "failed"
-        lines.append(f":x:  *{wf}* — {detail}")
+        counts = wf_counts.get(wf, {})
+        f_count = counts.get("failed", 0)
+        t_count = counts.get("total", 0)
+        if t_count > 0:
+            lines.append(f":x:  *{wf}* — {f_count}/{t_count} failed")
+        else:
+            lines.append(f":x:  *{wf}* — failed")
     blocks.append({"type": "divider"})
     blocks.append({
         "type": "section",

From 609db51bb5f3bde2c4c8292b3a1deaba503a5bb4 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 14:22:53 -0500
Subject: [PATCH 32/60] Show failures before matrix overview in consolidated
 HTML report

---
 ci/utils/aggregate_nightly.py | 55 +++++++++++++++++------------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index 2219bb85aa..ab27ca3365 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -386,34 +386,6 @@ def generate_consolidated_html(
   <div class="summary-card"><div class="num pass">{totals["resolved"]}</div><div class="lbl">Stabilized</div></div>
 </div>""")
 
-    # --- Matrix grid ---
-    parts.append("<section><h2>Matrix Overview</h2><table>")
-    parts.append(
-        "<tr><th>Test Type</th><th>Matrix</th><th>Status</th>"
-        "<th>Passed</th><th>Failed</th><th>Flaky</th><th>Total</th><th>Report</th></tr>"
-    )
-    for g in agg["matrix_grid"]:
-        counts = g["counts"]
-        # Build link to per-matrix HTML report on S3
-        report_link = ""
-        if s3_reports_prefix:
-            report_filename = f"{g['test_type']}-{g['matrix_label']}.html"
-            report_link = (
-                f'<a class="matrix-link" href="{_html_escape(s3_reports_prefix)}'
-                f'{_html_escape(report_filename)}">View</a>'
-            )
-        parts.append(
-            f"<tr><td><strong>{_html_escape(g['test_type'])}</strong></td>"
-            f"<td><code>{_html_escape(g['matrix_label'])}</code></td>"
-            f"<td>{_status_badge(g['status'])}</td>"
-            f"<td>{counts.get('passed', 0)}</td>"
-            f"<td>{counts.get('failed', 0)}</td>"
-            f"<td>{counts.get('flaky', 0)}</td>"
-            f"<td>{counts.get('total', 0)}</td>"
-            f"<td>{report_link}</td></tr>"
-        )
-    parts.append("</table></section>")
-
     # --- New failures ---
     if agg["all_new_failures"]:
         parts.append("<section><h2>New Failures</h2><table>")
@@ -501,6 +473,33 @@ def generate_consolidated_html(
             "All tests passed across all matrices!</p>"
         )
 
+    # --- Matrix grid (at the end) ---
+    parts.append("<section><h2>Matrix Overview</h2><table>")
+    parts.append(
+        "<tr><th>Test Type</th><th>Matrix</th><th>Status</th>"
+        "<th>Passed</th><th>Failed</th><th>Flaky</th><th>Total</th><th>Report</th></tr>"
+    )
+    for g in agg["matrix_grid"]:
+        counts = g["counts"]
+        report_link = ""
+        if s3_reports_prefix:
+            report_filename = f"{g['test_type']}-{g['matrix_label']}.html"
+            report_link = (
+                f'<a class="matrix-link" href="{_html_escape(s3_reports_prefix)}'
+                f'{_html_escape(report_filename)}">View</a>'
+            )
+        parts.append(
+            f"<tr><td><strong>{_html_escape(g['test_type'])}</strong></td>"
+            f"<td><code>{_html_escape(g['matrix_label'])}</code></td>"
+            f"<td>{_status_badge(g['status'])}</td>"
+            f"<td>{counts.get('passed', 0)}</td>"
+            f"<td>{counts.get('failed', 0)}</td>"
+            f"<td>{counts.get('flaky', 0)}</td>"
+            f"<td>{counts.get('total', 0)}</td>"
+            f"<td>{report_link}</td></tr>"
+        )
+    parts.append("</table></section>")
+
     parts.append("</body></html>")
     return "\n".join(parts)
 

From bcde49c8549da6a36de2d2cae62d8cfa26cb2ee1 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 14:24:55 -0500
Subject: [PATCH 33/60] Handle date switching gracefully in embedded dashboard

When the dashboard has embedded data and no S3 access, show a
friendly message instead of a 403 error when switching dates.
The embedded dashboard always shows the latest run.
---
 ci/dashboard/index.html | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/ci/dashboard/index.html b/ci/dashboard/index.html
index 511203af00..49872e4b5f 100644
--- a/ci/dashboard/index.html
+++ b/ci/dashboard/index.html
@@ -259,6 +259,11 @@ <h1>cuOpt Nightly</h1>
     // Already have consolidated data from embedding
     populateTestTypeFilters();
     render();
+    // Mark date selector with current date when embedded
+    if (S.embedded) {
+      const sel = document.getElementById('date-select');
+      sel.value = S.current.date || '';
+    }
   } else {
     const dates = Object.keys(S.index.dates || {}).sort().reverse();
     if (dates.length > 0) {
@@ -281,6 +286,20 @@ <h1>cuOpt Nightly</h1>
 /* ================================================================== */
 async function loadDate(dateStr) {
   const main = document.getElementById('main-content');
+
+  // If embedded and this is the embedded date, use embedded data
+  if (S.embedded && S.current && S.current.date === dateStr) {
+    populateTestTypeFilters();
+    render();
+    return;
+  }
+
+  // If embedded with no S3 access, can't load other dates
+  if (S.embedded && !S.baseUrl) {
+    showEmpty(`Only the latest run (${esc(S.current?.date || 'unknown')}) is available in this view. Download per-date reports from S3 for historical data.`);
+    return;
+  }
+
   main.innerHTML = '<div class="loading">Loading data for ' + esc(dateStr) + '...</div>';
 
   try {

From 2499eb3391c1020b99bdd3815538118a85ea052b Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 14:27:37 -0500
Subject: [PATCH 34/60] Show branch and date as info labels in embedded
 dashboard

Replace dropdowns with static labels when dashboard has embedded
data, since switching dates/branches requires S3 access.
---
 ci/dashboard/index.html | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/ci/dashboard/index.html b/ci/dashboard/index.html
index 49872e4b5f..c83cd87d3f 100644
--- a/ci/dashboard/index.html
+++ b/ci/dashboard/index.html
@@ -151,9 +151,11 @@
   <h1>cuOpt Nightly</h1>
 
   <label for="date-select">Date</label>
+  <div id="date-info" style="display:none;padding:6px 0;font-size:0.9rem;font-weight:600;"></div>
   <select id="date-select"><option>Loading...</option></select>
 
   <label for="branch-select">Branch</label>
+  <div id="branch-info" style="display:none;padding:6px 0;font-size:0.9rem;font-weight:600;"></div>
   <select id="branch-select"><option value="main">main</option></select>
 
   <div class="filter-group">
@@ -259,10 +261,16 @@ <h1>cuOpt Nightly</h1>
     // Already have consolidated data from embedding
     populateTestTypeFilters();
     render();
-    // Mark date selector with current date when embedded
     if (S.embedded) {
-      const sel = document.getElementById('date-select');
-      sel.value = S.current.date || '';
+      // Show branch and date as info labels, hide dropdowns
+      document.getElementById('date-select').style.display = 'none';
+      document.getElementById('branch-select').style.display = 'none';
+      const dateInfo = document.getElementById('date-info');
+      const branchInfo = document.getElementById('branch-info');
+      dateInfo.textContent = S.current.date || 'unknown';
+      dateInfo.style.display = 'block';
+      branchInfo.textContent = S.current.branch || 'unknown';
+      branchInfo.style.display = 'block';
     }
   } else {
     const dates = Object.keys(S.index.dates || {}).sort().reverse();

From b9e6b9803f013fc793a02967f0539c9c440e25cc Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 14:31:30 -0500
Subject: [PATCH 35/60] Add branch separation to S3 paths for multi-branch
 nightly support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- S3 summaries, reports, and history now include branch slug in path:
  summaries/{date}/{branch}/, reports/{date}/{branch}/, history/{branch}/
- Each branch gets its own dashboard at dashboard/{branch}/index.html
- index.json entries keyed by date/branch instead of just date
- Dashboard date selector shows "date — branch" labels
- Trends filtered to current branch
- Prevents main and release/26.04 nightlies from overwriting each other
---
 ci/dashboard/index.html           | 56 ++++++++++++++++++++-----------
 ci/nightly_summary.sh             | 11 +++---
 ci/utils/aggregate_nightly.py     | 15 +++++----
 ci/utils/nightly_report_helper.sh |  6 ++--
 4 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/ci/dashboard/index.html b/ci/dashboard/index.html
index c83cd87d3f..b24380b8a4 100644
--- a/ci/dashboard/index.html
+++ b/ci/dashboard/index.html
@@ -292,26 +292,32 @@ <h1>cuOpt Nightly</h1>
 /* ================================================================== */
 /* Data loading                                                        */
 /* ================================================================== */
-async function loadDate(dateStr) {
+async function loadDate(entryKey) {
   const main = document.getElementById('main-content');
 
-  // If embedded and this is the embedded date, use embedded data
-  if (S.embedded && S.current && S.current.date === dateStr) {
+  // entryKey can be "date/branch" or just "date" (legacy)
+  const entry = S.index?.dates?.[entryKey] || {};
+  const dateStr = entry.date || entryKey.split('/')[0] || entryKey;
+  const branch = entry.branch || entryKey.split('/')[1] || 'main';
+  const branchSlug = branch.replace(/\//g, '-');
+
+  // If embedded and this matches the embedded data, use it
+  if (S.embedded && S.current && S.current.date === dateStr && S.current.branch === branch) {
     populateTestTypeFilters();
     render();
     return;
   }
 
-  // If embedded with no S3 access, can't load other dates
+  // If embedded with no S3 access, can't load other entries
   if (S.embedded && !S.baseUrl) {
-    showEmpty(`Only the latest run (${esc(S.current?.date || 'unknown')}) is available in this view. Download per-date reports from S3 for historical data.`);
+    showEmpty(`Only the latest run (${esc(S.current?.date || 'unknown')} / ${esc(S.current?.branch || 'unknown')}) is available in this view.`);
     return;
   }
 
-  main.innerHTML = '<div class="loading">Loading data for ' + esc(dateStr) + '...</div>';
+  main.innerHTML = '<div class="loading">Loading data for ' + esc(dateStr) + ' / ' + esc(branch) + '...</div>';
 
   try {
-    const url = S.baseUrl + 'summaries/' + dateStr + '/consolidated.json';
+    const url = S.baseUrl + 'summaries/' + dateStr + '/' + branchSlug + '/consolidated.json';
     const resp = await fetch(url);
     if (!resp.ok) throw new Error(`${resp.status}`);
     S.current = await resp.json();
@@ -501,21 +507,27 @@ <h1>cuOpt Nightly</h1>
 function renderTrends() {
   if (!S.index || !S.index.dates) return '<p class="empty-state">No trend data available.</p>';
 
-  const dates = Object.keys(S.index.dates).sort().slice(-14); // Last 14 days
-  if (!dates.length) return '<p class="empty-state">No trend data available.</p>';
+  // Filter to current branch and sort by date
+  const currentBranch = S.current?.branch || 'main';
+  const entries = Object.entries(S.index.dates)
+    .filter(([_, v]) => (v.branch || 'main') === currentBranch)
+    .sort((a, b) => a[0].localeCompare(b[0]))
+    .slice(-14);
+  if (!entries.length) return '<p class="empty-state">No trend data available.</p>';
 
-  let html = '<section><h2>Test Results — Last 14 Days</h2>';
+  let html = `<section><h2>Test Results — Last 14 Runs (${esc(currentBranch)})</h2>`;
   html += '<div class="chart-container">';
 
   // Find max total for scaling
   let maxTotal = 1;
-  for (const d of dates) {
-    const t = S.index.dates[d].test_totals || {};
+  for (const [_, val] of entries) {
+    const t = val.test_totals || {};
     maxTotal = Math.max(maxTotal, t.total || 0);
   }
 
-  for (const d of dates) {
-    const t = S.index.dates[d].test_totals || {};
+  for (const [key, val] of entries) {
+    const d = val.date || key.split('/')[0] || key;
+    const t = val.test_totals || {};
     const total = t.total || 0;
     const passed = t.passed || 0;
     const failed = t.failed || 0;
@@ -539,11 +551,12 @@ <h1>cuOpt Nightly</h1>
   html += '</div></section>';
 
   // Job pass rate trend
-  html += '<section><h2>Matrix Job Pass Rate — Last 14 Days</h2>';
+  html += `<section><h2>Matrix Job Pass Rate — Last 14 Runs (${esc(currentBranch)})</h2>`;
   html += '<div class="chart-container">';
 
-  for (const d of dates) {
-    const j = S.index.dates[d].job_summary || {};
+  for (const [key, val] of entries) {
+    const d = val.date || key.split('/')[0] || key;
+    const j = val.job_summary || {};
     const total = j.total || 0;
     const passed = j.passed || 0;
     const failed = j.failed || 0;
@@ -638,8 +651,13 @@ <h1>cuOpt Nightly</h1>
 /* ================================================================== */
 function populateDateSelector() {
   const sel = document.getElementById('date-select');
-  const dates = Object.keys(S.index.dates || {}).sort().reverse();
-  sel.innerHTML = dates.map(d => `<option value="${d}">${d}</option>`).join('');
+  const entries = Object.entries(S.index.dates || {}).sort((a, b) => b[0].localeCompare(a[0]));
+  sel.innerHTML = entries.map(([key, val]) => {
+    const date = val.date || key.split('/')[0] || key;
+    const branch = val.branch || key.split('/')[1] || '';
+    const label = branch ? `${date} — ${branch}` : date;
+    return `<option value="${key}">${label}</option>`;
+  }).join('');
 }
 
 function setupEventListeners() {
diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index 647d08260a..2c50a0cd36 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -34,12 +34,13 @@ if [ -z "${CUOPT_DATASET_S3_URI:-}" ]; then
 fi
 
 S3_BASE="${CUOPT_DATASET_S3_URI}ci_test_reports/nightly"
-S3_SUMMARIES_PREFIX="${S3_BASE}/summaries/${RUN_DATE}/"
-S3_REPORTS_PREFIX="${S3_BASE}/reports/${RUN_DATE}/"
-S3_CONSOLIDATED_JSON="${S3_BASE}/summaries/${RUN_DATE}/consolidated.json"
-S3_CONSOLIDATED_HTML="${S3_BASE}/reports/${RUN_DATE}/consolidated.html"
+BRANCH_SLUG=$(echo "${BRANCH}" | tr '/' '-')
+S3_SUMMARIES_PREFIX="${S3_BASE}/summaries/${RUN_DATE}/${BRANCH_SLUG}/"
+S3_REPORTS_PREFIX="${S3_BASE}/reports/${RUN_DATE}/${BRANCH_SLUG}/"
+S3_CONSOLIDATED_JSON="${S3_BASE}/summaries/${RUN_DATE}/${BRANCH_SLUG}/consolidated.json"
+S3_CONSOLIDATED_HTML="${S3_BASE}/reports/${RUN_DATE}/${BRANCH_SLUG}/consolidated.html"
 S3_INDEX_URI="${S3_BASE}/index.json"
-S3_DASHBOARD_URI="${S3_BASE}/dashboard/index.html"
+S3_DASHBOARD_URI="${S3_BASE}/dashboard/${BRANCH_SLUG}/index.html"
 DASHBOARD_DIR="${SCRIPT_DIR}/dashboard"
 
 # --- Query GitHub API for workflow job statuses ---
diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index ab27ca3365..4767dc70c1 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -526,20 +526,23 @@ def update_index(s3_index_uri, date_str, consolidated, output_dir):
         except (json.JSONDecodeError, OSError):
             pass
 
-    # Add today's entry (compact — just enough for the dashboard trends)
-    index["dates"][date_str] = {
+    # Add today's entry keyed by date/branch for multi-branch support
+    branch = consolidated.get("branch", "main")
+    entry_key = f"{date_str}/{branch}"
+    index["dates"][entry_key] = {
+        "date": date_str,
+        "branch": branch,
         "job_summary": consolidated.get("job_summary", {}),
         "test_totals": consolidated.get("test_totals", {}),
         "has_new_failures": consolidated.get("has_new_failures", False),
-        "branch": consolidated.get("branch", ""),
         "github_run_url": consolidated.get("github_run_url", ""),
     }
 
-    # Prune to last N days
+    # Prune to last N entries
     dates_sorted = sorted(index["dates"].keys(), reverse=True)
     if len(dates_sorted) > MAX_INDEX_DAYS:
-        for old_date in dates_sorted[MAX_INDEX_DAYS:]:
-            del index["dates"][old_date]
+        for old_key in dates_sorted[MAX_INDEX_DAYS:]:
+            del index["dates"][old_key]
 
     # Write and upload
     with open(local_index, "w") as f:
diff --git a/ci/utils/nightly_report_helper.sh b/ci/utils/nightly_report_helper.sh
index 809b918df8..0ab568c34d 100755
--- a/ci/utils/nightly_report_helper.sh
+++ b/ci/utils/nightly_report_helper.sh
@@ -72,9 +72,9 @@ generate_nightly_report() {
 
     if [ -n "${CUOPT_DATASET_S3_URI:-}" ]; then
         local s3_base="${CUOPT_DATASET_S3_URI}ci_test_reports/nightly"
-        s3_history_uri="${s3_base}/history/${test_type}-${branch_slug}-${matrix_label}.json"
-        s3_summary_uri="${s3_base}/summaries/${run_date}/${test_type}-${matrix_label}.json"
-        s3_html_uri="${s3_base}/reports/${run_date}/${test_type}-${matrix_label}.html"
+        s3_history_uri="${s3_base}/history/${branch_slug}/${test_type}-${matrix_label}.json"
+        s3_summary_uri="${s3_base}/summaries/${run_date}/${branch_slug}/${test_type}-${matrix_label}.json"
+        s3_html_uri="${s3_base}/reports/${run_date}/${branch_slug}/${test_type}-${matrix_label}.html"
     fi
 
     # --- Run nightly report ---

From 9f7cef5fedf31ffb87a66187fcf5663b033896b2 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 14:38:49 -0500
Subject: [PATCH 36/60] Fall back to legacy S3 path when branch-separated path
 is empty

Checks if the new branch-separated summaries path has data before
aggregating. Falls back to the old flat path for backward
compatibility with summaries uploaded before the branch separation.
---
 ci/nightly_summary.sh | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index 2c50a0cd36..7cbc8a1c51 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -57,6 +57,15 @@ else
     echo "{}" > "${WORKFLOW_JOBS_JSON}"
 fi
 
+# Check if branch-separated path has data; fall back to legacy path if empty
+S3_SUMMARIES_LEGACY="${S3_BASE}/summaries/${RUN_DATE}/"
+SUMMARY_COUNT=$(aws s3 ls "${S3_SUMMARIES_PREFIX}" 2>/dev/null | wc -l || echo "0")
+if [ "${SUMMARY_COUNT}" -eq 0 ]; then
+    echo "No summaries at branch-separated path, falling back to legacy path"
+    S3_SUMMARIES_PREFIX="${S3_SUMMARIES_LEGACY}"
+    S3_REPORTS_PREFIX="${S3_BASE}/reports/${RUN_DATE}/"
+fi
+
 echo "Aggregating nightly summaries from ${S3_SUMMARIES_PREFIX}"
 
 python3 "${SCRIPT_DIR}/utils/aggregate_nightly.py" \

From 889cc6091095e6d7a7c5917fbdf06fd91169ac2f Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 14:52:23 -0500
Subject: [PATCH 37/60] Pass date through to nightly-summary for correct S3
 path lookup

The nightly-summary was hardcoding today's date, but summaries on
S3 are keyed by the date they were created. When re-running
against earlier data, the date must match. Now uses the date input
from the workflow, falling back to today if not provided.
---
 .github/workflows/nightly-summary.yaml | 8 ++++++++
 .github/workflows/test.yaml            | 1 +
 ci/nightly_summary.sh                  | 2 +-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/nightly-summary.yaml b/.github/workflows/nightly-summary.yaml
index c286466937..724d55636e 100644
--- a/.github/workflows/nightly-summary.yaml
+++ b/.github/workflows/nightly-summary.yaml
@@ -20,6 +20,10 @@ on:
         required: true
         type: string
         default: nightly
+      date:
+        description: "Date (YYYY-MM-DD) for this run. Defaults to today."
+        required: false
+        type: string
   workflow_call:
     inputs:
       branch:
@@ -31,6 +35,9 @@ on:
       build_type:
         required: true
         type: string
+      date:
+        required: false
+        type: string
     secrets:
       CUOPT_DATASET_S3_URI:
         required: true
@@ -68,6 +75,7 @@ jobs:
           CUOPT_SLACK_CHANNEL_ID: ${{ secrets.CUOPT_SLACK_CHANNEL_ID }}
           RAPIDS_BUILD_TYPE: ${{ inputs.build_type }}
           RAPIDS_BRANCH: ${{ inputs.branch }}
+          RUN_DATE: ${{ inputs.date }}
           GITHUB_TOKEN: ${{ github.token }}
           GITHUB_RUN_ID: ${{ github.run_id }}
           GITHUB_REPOSITORY: ${{ github.repository }}
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 2bbf8105e2..4fa51ade1b 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -121,6 +121,7 @@ jobs:
       branch: ${{ inputs.branch }}
       sha: ${{ inputs.sha }}
       build_type: ${{ inputs.build_type }}
+      date: ${{ inputs.date }}
     secrets:
       CUOPT_DATASET_S3_URI: ${{ secrets.CUOPT_DATASET_S3_URI }}
       CUOPT_AWS_ACCESS_KEY_ID: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index 7cbc8a1c51..2494a22704 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -22,7 +22,7 @@ SCRIPT_DIR="$(dirname "$(realpath "${BASH_SOURCE[0]}")")"
 OUTPUT_DIR="${PWD}/aggregate-output"
 mkdir -p "${OUTPUT_DIR}"
 
-RUN_DATE="$(date +%F)"
+RUN_DATE="${RUN_DATE:-$(date +%F)}"
 BRANCH="${RAPIDS_BRANCH:-main}"
 
 GITHUB_RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY:-NVIDIA/cuopt}/actions/runs/${GITHUB_RUN_ID:-}"

From f30a4e2c708919955c72550a997beab2955d4e46 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 15:19:16 -0500
Subject: [PATCH 38/60] Move AWS credential mapping before S3 fallback check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The aws s3 ls command in the legacy path fallback needs credentials
to access the private bucket. Moving CUOPT_AWS_* → AWS_* mapping
to the top of the script so all aws CLI calls have credentials.
---
 ci/nightly_summary.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index 2494a22704..9fab77ff09 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -27,6 +27,11 @@ BRANCH="${RAPIDS_BRANCH:-main}"
 
 GITHUB_RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY:-NVIDIA/cuopt}/actions/runs/${GITHUB_RUN_ID:-}"
 
+# Map CUOPT_AWS_* to standard AWS env vars for the aws CLI
+export AWS_ACCESS_KEY_ID="${CUOPT_AWS_ACCESS_KEY_ID:-${AWS_ACCESS_KEY_ID:-}}"
+export AWS_SECRET_ACCESS_KEY="${CUOPT_AWS_SECRET_ACCESS_KEY:-${AWS_SECRET_ACCESS_KEY:-}}"
+unset AWS_SESSION_TOKEN
+
 if [ -z "${CUOPT_DATASET_S3_URI:-}" ]; then
     echo "WARNING: CUOPT_DATASET_S3_URI is not set. Skipping nightly aggregation." >&2
     echo "The per-matrix reports (uploaded by individual test jobs) are still available on S3."
@@ -83,11 +88,6 @@ python3 "${SCRIPT_DIR}/utils/aggregate_nightly.py" \
     --workflow-jobs "${WORKFLOW_JOBS_JSON}"
 
 # --- Generate presigned URLs for reports (7-day expiry) ---
-# Map CUOPT_AWS_* to standard AWS env vars for the aws CLI
-export AWS_ACCESS_KEY_ID="${CUOPT_AWS_ACCESS_KEY_ID:-${AWS_ACCESS_KEY_ID:-}}"
-export AWS_SECRET_ACCESS_KEY="${CUOPT_AWS_SECRET_ACCESS_KEY:-${AWS_SECRET_ACCESS_KEY:-}}"
-unset AWS_SESSION_TOKEN
-
 PRESIGN_EXPIRY=604800
 PRESIGNED_HTML=$(aws s3 presign "${S3_CONSOLIDATED_HTML}" --expires-in "${PRESIGN_EXPIRY}" 2>&1) || {
     echo "WARNING: Failed to generate presigned URL for report: ${PRESIGNED_HTML}" >&2

From 404263c0165ecf50095ede250685eedf6672a3cf Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 15:26:50 -0500
Subject: [PATCH 39/60] Exclude consolidated.json from fallback path check

Previous empty runs uploaded consolidated.json to the branch path,
causing the fallback to think data exists. Now only counts actual
per-matrix summary files when deciding whether to fall back.
---
 ci/nightly_summary.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index 9fab77ff09..c8708c4777 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -62,11 +62,11 @@ else
     echo "{}" > "${WORKFLOW_JOBS_JSON}"
 fi
 
-# Check if branch-separated path has data; fall back to legacy path if empty
+# Check if branch-separated path has per-matrix summaries (exclude consolidated.json)
 S3_SUMMARIES_LEGACY="${S3_BASE}/summaries/${RUN_DATE}/"
-SUMMARY_COUNT=$(aws s3 ls "${S3_SUMMARIES_PREFIX}" 2>/dev/null | wc -l || echo "0")
+SUMMARY_COUNT=$(aws s3 ls "${S3_SUMMARIES_PREFIX}" 2>/dev/null | grep -v consolidated | grep -c '\.json' || echo "0")
 if [ "${SUMMARY_COUNT}" -eq 0 ]; then
-    echo "No summaries at branch-separated path, falling back to legacy path"
+    echo "No per-matrix summaries at branch path (${S3_SUMMARIES_PREFIX}), falling back to legacy path"
     S3_SUMMARIES_PREFIX="${S3_SUMMARIES_LEGACY}"
     S3_REPORTS_PREFIX="${S3_BASE}/reports/${RUN_DATE}/"
 fi

From c90e73c0e8d35adbc640ca592bcd32f537ba4fa7 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 15:30:11 -0500
Subject: [PATCH 40/60] Remove legacy S3 path fallback

Branch-separated paths are now the only path structure. A full
build+test run will populate the new paths.
---
 ci/nightly_summary.sh | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index c8708c4777..7e77fa5052 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -62,14 +62,6 @@ else
     echo "{}" > "${WORKFLOW_JOBS_JSON}"
 fi
 
-# Check if branch-separated path has per-matrix summaries (exclude consolidated.json)
-S3_SUMMARIES_LEGACY="${S3_BASE}/summaries/${RUN_DATE}/"
-SUMMARY_COUNT=$(aws s3 ls "${S3_SUMMARIES_PREFIX}" 2>/dev/null | grep -v consolidated | grep -c '\.json' || echo "0")
-if [ "${SUMMARY_COUNT}" -eq 0 ]; then
-    echo "No per-matrix summaries at branch path (${S3_SUMMARIES_PREFIX}), falling back to legacy path"
-    S3_SUMMARIES_PREFIX="${S3_SUMMARIES_LEGACY}"
-    S3_REPORTS_PREFIX="${S3_BASE}/reports/${RUN_DATE}/"
-fi
 
 echo "Aggregating nightly summaries from ${S3_SUMMARIES_PREFIX}"
 

From 54d01fddf9a82bb0cec5ddfcfabc0f133c01d367 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 16:53:30 -0500
Subject: [PATCH 41/60] Add tests and build-images to build-summary needs

Build summary should wait for and report on all jobs including
the test trigger and image builds.
---
 .github/workflows/build.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index ee455a4452..414e19e977 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -277,6 +277,8 @@ jobs:
       - wheel-build-cuopt-sh-client
       - wheel-publish-cuopt-sh-client
       - docs-build
+      - tests
+      - build-images
     runs-on: linux-amd64-cpu4
     container:
       image: python:3.12-slim

From b7efe3b1882cda811b48b5a247507cc83719c263 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 15 Apr 2026 16:55:49 -0500
Subject: [PATCH 42/60] Simplify build-summary needs to leaf jobs only

Only need to depend on tests, build-images, and docs-build since
they transitively depend on all upstream build/publish jobs.
---
 .github/workflows/build.yaml | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 414e19e977..3ba0edd8c1 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -263,22 +263,9 @@ jobs:
   build-summary:
     if: ${{ always() && (inputs.build_type == 'nightly') }}
     needs:
-      - cpp-build
-      - python-build
-      - upload-conda
-      - wheel-build-cuopt-mps-parser
-      - wheel-publish-cuopt-mps-parser
-      - wheel-build-libcuopt
-      - wheel-publish-libcuopt
-      - wheel-build-cuopt
-      - wheel-publish-cuopt
-      - wheel-build-cuopt-server
-      - wheel-publish-cuopt-server
-      - wheel-build-cuopt-sh-client
-      - wheel-publish-cuopt-sh-client
-      - docs-build
       - tests
       - build-images
+      - docs-build
     runs-on: linux-amd64-cpu4
     container:
       image: python:3.12-slim

From f72f57da67a1c34040b540cf96575ddf56577f8d Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 16 Apr 2026 10:14:07 -0500
Subject: [PATCH 43/60] Remove duplicate failure list and simplify status text

- Status line now says "N workflow(s) with failures" instead of
  listing workflow names (the breakdown is right below)
- Remove "Failed CI Workflows" thread reply since the main message
  already shows the same per-workflow counts
---
 ci/utils/send_consolidated_summary.sh | 35 ++-------------------------
 1 file changed, 2 insertions(+), 33 deletions(-)

diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index bf63b48d94..195a7d5797 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -107,15 +107,11 @@ untracked_count = len(untracked_failed)
 
 if has_failures and (has_new or untracked_count > 0):
     emoji = ":rotating_light:"
-    wf_list = ", ".join(sorted(failing_workflows)[:5])
-    if len(failing_workflows) > 5:
-        wf_list += f" +{len(failing_workflows) - 5} more"
-    text = f"Failures in: {wf_list}"
+    text = f"{len(failing_workflows)} workflow(s) with failures"
     mention = ""
 elif has_failures:
     emoji = ":x:"
-    wf_list = ", ".join(sorted(failing_workflows)[:5])
-    text = f"Recurring failures in: {wf_list}"
+    text = f"Recurring failures in {len(failing_workflows)} workflow(s)"
     mention = ""
 elif flaky_workflows:
     emoji = ":large_yellow_circle:"
@@ -272,33 +268,6 @@ if issues_by_wf:
 
         print(make_payload(wf_blocks))
 
-# ── Thread 2: CI Workflow Status (only failures + summary) ────────────
-if workflow_jobs:
-    wf_groups = {}
-    for j in workflow_jobs:
-        prefix = j["name"].split(" / ")[0] if " / " in j["name"] else j["name"]
-        wf_groups.setdefault(prefix, []).append(j)
-
-    failed_lines = []
-    passed_count = 0
-    for group_name, group_jobs in sorted(wf_groups.items()):
-        passed = sum(1 for j in group_jobs if j["conclusion"] == "success")
-        failed = sum(1 for j in group_jobs if j["conclusion"] == "failure")
-        total = len(group_jobs)
-
-        if failed > 0:
-            failed_lines.append(f":x:  *{group_name}* — {failed}/{total} failed")
-        else:
-            passed_count += 1
-
-    if failed_lines:
-        text = "*Failed CI Workflows:*\n" + "\n".join(failed_lines)
-        if passed_count > 0:
-            text += f"\n_{passed_count} other workflow(s) passed_"
-        print(make_payload([{
-            "type": "section",
-            "text": {"type": "mrkdwn", "text": text},
-        }]))
 PYEOF
 )
 

From 967483de5f7fd33b704db6530dfba117f73f3ac6 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 16 Apr 2026 13:14:37 -0500
Subject: [PATCH 44/60] Fix S3 path mismatch: per-matrix summaries use flat
 paths

The rapidsai shared workflows set RAPIDS_BRANCH to 'main' inside
test containers regardless of the branch input. Per-matrix summaries
are uploaded to flat date-based paths (summaries/{date}/), not
branch-separated ones. Only nightly-summary outputs (consolidated
report, dashboard) use branch-separated paths.

Reverts nightly_report_helper.sh to original flat paths and updates
nightly_summary.sh to read from flat paths while writing outputs
to branch-separated paths.
---
 ci/nightly_summary.sh             | 19 +++++++++++++++++--
 ci/utils/nightly_report_helper.sh |  6 +++---
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index 7e77fa5052..22c63f9742 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -40,8 +40,11 @@ fi
 
 S3_BASE="${CUOPT_DATASET_S3_URI}ci_test_reports/nightly"
 BRANCH_SLUG=$(echo "${BRANCH}" | tr '/' '-')
-S3_SUMMARIES_PREFIX="${S3_BASE}/summaries/${RUN_DATE}/${BRANCH_SLUG}/"
-S3_REPORTS_PREFIX="${S3_BASE}/reports/${RUN_DATE}/${BRANCH_SLUG}/"
+# Per-matrix summaries are uploaded by rapidsai shared workflows which use
+# a flat date-based path (RAPIDS_BRANCH inside those containers is always "main").
+# Only our outputs (consolidated, dashboard) use branch-separated paths.
+S3_SUMMARIES_PREFIX="${S3_BASE}/summaries/${RUN_DATE}/"
+S3_REPORTS_PREFIX="${S3_BASE}/reports/${RUN_DATE}/"
 S3_CONSOLIDATED_JSON="${S3_BASE}/summaries/${RUN_DATE}/${BRANCH_SLUG}/consolidated.json"
 S3_CONSOLIDATED_HTML="${S3_BASE}/reports/${RUN_DATE}/${BRANCH_SLUG}/consolidated.html"
 S3_INDEX_URI="${S3_BASE}/index.json"
@@ -63,6 +66,18 @@ else
 fi
 
 
+echo "RUN_DATE=${RUN_DATE}, BRANCH=${BRANCH}, BRANCH_SLUG=${BRANCH_SLUG}"
+echo "Listing S3 summaries at ${S3_SUMMARIES_PREFIX}:"
+aws s3 ls "${S3_SUMMARIES_PREFIX}" 2>&1 || echo "(no files or access error)"
+# Diagnostic: show what's on S3 for this date
+echo "=== S3 diagnostics ==="
+echo "RUN_DATE=${RUN_DATE} BRANCH=${BRANCH} BRANCH_SLUG=${BRANCH_SLUG}"
+echo "Looking for summaries at: ${S3_SUMMARIES_PREFIX}"
+aws s3 ls "${S3_SUMMARIES_PREFIX}" 2>&1 | head -5 || true
+echo "All summaries for ${RUN_DATE}:"
+aws s3 ls "${S3_BASE}/summaries/${RUN_DATE}/" 2>&1 | head -10 || true
+echo "=== End diagnostics ==="
+
 echo "Aggregating nightly summaries from ${S3_SUMMARIES_PREFIX}"
 
 python3 "${SCRIPT_DIR}/utils/aggregate_nightly.py" \
diff --git a/ci/utils/nightly_report_helper.sh b/ci/utils/nightly_report_helper.sh
index 0ab568c34d..809b918df8 100755
--- a/ci/utils/nightly_report_helper.sh
+++ b/ci/utils/nightly_report_helper.sh
@@ -72,9 +72,9 @@ generate_nightly_report() {
 
     if [ -n "${CUOPT_DATASET_S3_URI:-}" ]; then
         local s3_base="${CUOPT_DATASET_S3_URI}ci_test_reports/nightly"
-        s3_history_uri="${s3_base}/history/${branch_slug}/${test_type}-${matrix_label}.json"
-        s3_summary_uri="${s3_base}/summaries/${run_date}/${branch_slug}/${test_type}-${matrix_label}.json"
-        s3_html_uri="${s3_base}/reports/${run_date}/${branch_slug}/${test_type}-${matrix_label}.html"
+        s3_history_uri="${s3_base}/history/${test_type}-${branch_slug}-${matrix_label}.json"
+        s3_summary_uri="${s3_base}/summaries/${run_date}/${test_type}-${matrix_label}.json"
+        s3_html_uri="${s3_base}/reports/${run_date}/${test_type}-${matrix_label}.html"
     fi
 
     # --- Run nightly report ---

From 285cb9f365868421c35dd0e0bbef438db309062f Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 16 Apr 2026 13:28:23 -0500
Subject: [PATCH 45/60] Fix s3_list to recursively find summaries in
 subdirectories

The per-matrix summaries may be in branch subdirectories under the
date prefix (e.g., summaries/2026-04-16/main/). The non-recursive
aws s3 ls only returned directory prefixes, not actual files. Now
uses --recursive to find all JSON files regardless of nesting.
---
 ci/utils/s3_helpers.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/ci/utils/s3_helpers.py b/ci/utils/s3_helpers.py
index be1d2c872b..54e8b96d21 100644
--- a/ci/utils/s3_helpers.py
+++ b/ci/utils/s3_helpers.py
@@ -91,11 +91,13 @@ def s3_upload(local_path, s3_uri):
 
 
 def s3_list(s3_prefix):
-    """List objects under an S3 prefix. Returns list of S3 URIs."""
+    """List objects under an S3 prefix (recursive). Returns list of S3 URIs."""
     env = s3_env()
+    # Extract bucket and prefix from s3_prefix for reconstructing full URIs
+    # s3_prefix looks like "s3://bucket/path/to/prefix/"
     try:
         result = subprocess.run(
-            ["aws", "s3", "ls", s3_prefix],
+            ["aws", "s3", "ls", "--recursive", s3_prefix],
             env=env,
             check=True,
             capture_output=True,
@@ -105,9 +107,18 @@ def s3_list(s3_prefix):
         print(f"WARNING: S3 ls failed: {exc}", file=sys.stderr)
         return []
 
+    # --recursive output format: "2026-04-16 12:00:00  1234 path/to/file.json"
+    # We need to reconstruct full S3 URIs from the key paths
+    # Parse bucket from s3_prefix
+    if not s3_prefix.startswith("s3://"):
+        return []
+    without_scheme = s3_prefix[5:]  # remove "s3://"
+    bucket = without_scheme.split("/")[0]
+    base_uri = f"s3://{bucket}/"
+
     uris = []
     for line in result.stdout.strip().splitlines():
-        parts = line.split()
-        if parts:
-            uris.append(f"{s3_prefix}{parts[-1]}")
+        parts = line.split(None, 3)  # date, time, size, key
+        if len(parts) == 4:
+            uris.append(f"{base_uri}{parts[3]}")
     return uris

From 94067f9707e7d664dc4b47f0d06847122eae3eba Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 16 Apr 2026 13:33:18 -0500
Subject: [PATCH 46/60] Restore branch-separated paths for multi-branch nightly
 support

Per-matrix summaries use branch subdirectories so main and release
branches don't overwrite each other. For production nightlies,
RAPIDS_BRANCH matches the branch input. s3_list is recursive so it
handles any subdirectory structure.
---
 ci/nightly_summary.sh             | 11 ++++++-----
 ci/utils/nightly_report_helper.sh |  6 +++---
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index 22c63f9742..3ad9210486 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -40,11 +40,12 @@ fi
 
 S3_BASE="${CUOPT_DATASET_S3_URI}ci_test_reports/nightly"
 BRANCH_SLUG=$(echo "${BRANCH}" | tr '/' '-')
-# Per-matrix summaries are uploaded by rapidsai shared workflows which use
-# a flat date-based path (RAPIDS_BRANCH inside those containers is always "main").
-# Only our outputs (consolidated, dashboard) use branch-separated paths.
-S3_SUMMARIES_PREFIX="${S3_BASE}/summaries/${RUN_DATE}/"
-S3_REPORTS_PREFIX="${S3_BASE}/reports/${RUN_DATE}/"
+# Per-matrix summaries are uploaded by test jobs under summaries/{date}/{branch}/.
+# For production nightlies (main, release/*), RAPIDS_BRANCH matches the branch input.
+# For feature branch testing, RAPIDS_BRANCH may default to "main" in rapidsai containers,
+# so we search the date prefix recursively (s3_list handles this).
+S3_SUMMARIES_PREFIX="${S3_BASE}/summaries/${RUN_DATE}/${BRANCH_SLUG}/"
+S3_REPORTS_PREFIX="${S3_BASE}/reports/${RUN_DATE}/${BRANCH_SLUG}/"
 S3_CONSOLIDATED_JSON="${S3_BASE}/summaries/${RUN_DATE}/${BRANCH_SLUG}/consolidated.json"
 S3_CONSOLIDATED_HTML="${S3_BASE}/reports/${RUN_DATE}/${BRANCH_SLUG}/consolidated.html"
 S3_INDEX_URI="${S3_BASE}/index.json"
diff --git a/ci/utils/nightly_report_helper.sh b/ci/utils/nightly_report_helper.sh
index 809b918df8..0ab568c34d 100755
--- a/ci/utils/nightly_report_helper.sh
+++ b/ci/utils/nightly_report_helper.sh
@@ -72,9 +72,9 @@ generate_nightly_report() {
 
     if [ -n "${CUOPT_DATASET_S3_URI:-}" ]; then
         local s3_base="${CUOPT_DATASET_S3_URI}ci_test_reports/nightly"
-        s3_history_uri="${s3_base}/history/${test_type}-${branch_slug}-${matrix_label}.json"
-        s3_summary_uri="${s3_base}/summaries/${run_date}/${test_type}-${matrix_label}.json"
-        s3_html_uri="${s3_base}/reports/${run_date}/${test_type}-${matrix_label}.html"
+        s3_history_uri="${s3_base}/history/${branch_slug}/${test_type}-${matrix_label}.json"
+        s3_summary_uri="${s3_base}/summaries/${run_date}/${branch_slug}/${test_type}-${matrix_label}.json"
+        s3_html_uri="${s3_base}/reports/${run_date}/${branch_slug}/${test_type}-${matrix_label}.html"
     fi
 
     # --- Run nightly report ---

From 952b8f64b8cc19e1b2ab45ace9f402370736ed63 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 16 Apr 2026 13:36:58 -0500
Subject: [PATCH 47/60] Remove unused status filter from dashboard sidebar

The status filter only applied to the Matrix Grid tab which is no
longer the default view. Tab navigation (Failures, Flaky, etc.)
already serves as the status filter.
---
 ci/dashboard/index.html | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/ci/dashboard/index.html b/ci/dashboard/index.html
index b24380b8a4..73329dea0c 100644
--- a/ci/dashboard/index.html
+++ b/ci/dashboard/index.html
@@ -163,16 +163,6 @@ <h1>cuOpt Nightly</h1>
     <div id="test-type-filters"></div>
   </div>
 
-  <div class="filter-group">
-    <label>Status</label>
-    <div id="status-filters">
-      <span class="filter-chip active" data-status="all">All</span>
-      <span class="filter-chip" data-status="failed-new">New Fail</span>
-      <span class="filter-chip" data-status="failed-recurring">Recurring</span>
-      <span class="filter-chip" data-status="flaky">Flaky</span>
-      <span class="filter-chip" data-status="passed">Passed</span>
-    </div>
-  </div>
 
   <div class="status-legend">
     <div><span class="dot" style="background:var(--pass)"></span> Passed</div>
@@ -665,16 +655,6 @@ <h1>cuOpt Nightly</h1>
     loadDate(e.target.value);
   });
 
-  // Status filter chips
-  for (const chip of document.querySelectorAll('#status-filters .filter-chip')) {
-    chip.addEventListener('click', () => {
-      document.querySelectorAll('#status-filters .filter-chip')
-        .forEach(c => c.classList.remove('active'));
-      chip.classList.add('active');
-      S.filters.status = chip.dataset.status;
-      renderTab(S.activeTab);
-    });
-  }
 }
 
 function statusBadge(status) {

From 4cfa8860a3d1530950822ffaf7d58099327c7f62 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 16 Apr 2026 13:50:37 -0500
Subject: [PATCH 48/60] Add fallback S3 prefix for cross-branch summary lookup

When the branch-specific summaries path is empty (e.g., feature
branch testing where RAPIDS_BRANCH defaults to main in rapidsai
containers), falls back to searching the date-level prefix to find
summaries uploaded by test jobs. Ensures nightly-summary always
finds data from the same run regardless of RAPIDS_BRANCH mismatch.
---
 ci/nightly_summary.sh         | 16 +++++-----------
 ci/utils/aggregate_nightly.py | 26 ++++++++++++++++++++++++--
 2 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index 3ad9210486..04ef08682b 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -67,22 +67,16 @@ else
 fi
 
 
-echo "RUN_DATE=${RUN_DATE}, BRANCH=${BRANCH}, BRANCH_SLUG=${BRANCH_SLUG}"
-echo "Listing S3 summaries at ${S3_SUMMARIES_PREFIX}:"
-aws s3 ls "${S3_SUMMARIES_PREFIX}" 2>&1 || echo "(no files or access error)"
-# Diagnostic: show what's on S3 for this date
-echo "=== S3 diagnostics ==="
-echo "RUN_DATE=${RUN_DATE} BRANCH=${BRANCH} BRANCH_SLUG=${BRANCH_SLUG}"
-echo "Looking for summaries at: ${S3_SUMMARIES_PREFIX}"
-aws s3 ls "${S3_SUMMARIES_PREFIX}" 2>&1 | head -5 || true
-echo "All summaries for ${RUN_DATE}:"
-aws s3 ls "${S3_BASE}/summaries/${RUN_DATE}/" 2>&1 | head -10 || true
-echo "=== End diagnostics ==="
+# Fallback: search the date-level prefix if branch-specific path is empty.
+# This handles the case where RAPIDS_BRANCH in rapidsai containers differs
+# from the branch input (e.g., feature branch testing where RAPIDS_BRANCH=main).
+S3_SUMMARIES_FALLBACK="${S3_BASE}/summaries/${RUN_DATE}/"
 
 echo "Aggregating nightly summaries from ${S3_SUMMARIES_PREFIX}"
 
 python3 "${SCRIPT_DIR}/utils/aggregate_nightly.py" \
     --s3-summaries-prefix "${S3_SUMMARIES_PREFIX}" \
+    --s3-summaries-fallback "${S3_SUMMARIES_FALLBACK}" \
     --s3-reports-prefix "${S3_REPORTS_PREFIX}" \
     --s3-output-uri "${S3_CONSOLIDATED_JSON}" \
     --s3-html-output-uri "${S3_CONSOLIDATED_HTML}" \
diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index 4767dc70c1..04989a4846 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -38,8 +38,11 @@
 # ---------------------------------------------------------------------------
 
 
-def download_summaries(s3_prefix, local_dir):
+def download_summaries(s3_prefix, local_dir, s3_fallback_prefix=""):
     """Download all JSON summaries from S3 prefix into local_dir.
+    If s3_fallback_prefix is set and no summaries found at s3_prefix,
+    retries with the fallback (used when RAPIDS_BRANCH in rapidsai
+    containers doesn't match the branch input).
     Returns list of loaded summary dicts."""
     local_dir = Path(local_dir)
     local_dir.mkdir(parents=True, exist_ok=True)
@@ -49,6 +52,18 @@ def download_summaries(s3_prefix, local_dir):
         u for u in uris
         if u.endswith(".json") and not u.endswith("/consolidated.json")
     ]
+
+    # Fallback: search the parent date prefix if branch-specific path is empty
+    if not json_uris and s3_fallback_prefix and s3_fallback_prefix != s3_prefix:
+        print(f"No summaries at {s3_prefix}, trying fallback: {s3_fallback_prefix}")
+        uris = s3_list(s3_fallback_prefix)
+        json_uris = [
+            u for u in uris
+            if u.endswith(".json") and not u.endswith("/consolidated.json")
+        ]
+        if json_uris:
+            s3_prefix = s3_fallback_prefix
+
     print(f"Found {len(json_uris)} summary file(s) at {s3_prefix}")
 
     summaries = []
@@ -567,6 +582,11 @@ def main():
         default="",
         help="S3 prefix for per-matrix JSON summaries (e.g., s3://bucket/.../summaries/2026-04-13/)",
     )
+    parser.add_argument(
+        "--s3-summaries-fallback",
+        default="",
+        help="Fallback S3 prefix if no summaries found at primary prefix",
+    )
     parser.add_argument(
         "--s3-reports-prefix",
         default="",
@@ -633,7 +653,9 @@ def main():
         summaries = load_local_summaries(args.local_summaries_dir)
     elif args.s3_summaries_prefix:
         download_dir = output_dir / "downloaded_summaries"
-        summaries = download_summaries(args.s3_summaries_prefix, download_dir)
+        summaries = download_summaries(
+            args.s3_summaries_prefix, download_dir, args.s3_summaries_fallback
+        )
     else:
         print(
             "ERROR: Provide --s3-summaries-prefix or --local-summaries-dir",

From 7b5c12ef1045f9522c15cdc9f6b780c8c6fa19ea Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 16 Apr 2026 15:21:21 -0500
Subject: [PATCH 49/60] Add CUOPT_S3_URI as common S3 base for reports

New repo secret CUOPT_S3_URI (e.g., s3://cuopt-datasets/) is the
bucket root. Scripts append ci_test_reports/nightly/ to build full
paths, keeping reports outside ci_datasets/. Replaces the previous
use of CUOPT_DATASET_S3_URI for report paths.
---
 .github/workflows/nightly-summary.yaml |  4 ++--
 .github/workflows/test.yaml            | 10 +++++++++-
 ci/nightly_summary.sh                  |  7 +++----
 ci/utils/nightly_report_helper.sh      |  4 ++--
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/nightly-summary.yaml b/.github/workflows/nightly-summary.yaml
index 724d55636e..1bc3369c41 100644
--- a/.github/workflows/nightly-summary.yaml
+++ b/.github/workflows/nightly-summary.yaml
@@ -39,7 +39,7 @@ on:
         required: false
         type: string
     secrets:
-      CUOPT_DATASET_S3_URI:
+      CUOPT_S3_URI:
         required: true
       CUOPT_AWS_ACCESS_KEY_ID:
         required: true
@@ -67,7 +67,7 @@ jobs:
           pip install awscli
       - name: Run nightly summary
         env:
-          CUOPT_DATASET_S3_URI: ${{ secrets.CUOPT_DATASET_S3_URI }}
+          CUOPT_S3_URI: ${{ secrets.CUOPT_S3_URI }}
           CUOPT_AWS_ACCESS_KEY_ID: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
           CUOPT_AWS_SECRET_ACCESS_KEY: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
           CUOPT_SLACK_WEBHOOK_URL: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 4fa51ade1b..a7bafa5cbd 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -46,6 +46,8 @@ jobs:
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+      script-env-secret-4-key: CUOPT_S3_URI
+      script-env-secret-4-value: ${{ secrets.CUOPT_S3_URI }}
   conda-python-tests:
     if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main
@@ -63,6 +65,8 @@ jobs:
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+      script-env-secret-4-key: CUOPT_S3_URI
+      script-env-secret-4-value: ${{ secrets.CUOPT_S3_URI }}
   wheel-tests-cuopt:
     if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
@@ -79,6 +83,8 @@ jobs:
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+      script-env-secret-4-key: CUOPT_S3_URI
+      script-env-secret-4-value: ${{ secrets.CUOPT_S3_URI }}
   wheel-tests-cuopt-server:
     if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
@@ -95,6 +101,8 @@ jobs:
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+      script-env-secret-4-key: CUOPT_S3_URI
+      script-env-secret-4-value: ${{ secrets.CUOPT_S3_URI }}
   conda-notebook-tests:
     if: ${{ !inputs.summary-only }}
     secrets: inherit
@@ -123,7 +131,7 @@ jobs:
       build_type: ${{ inputs.build_type }}
       date: ${{ inputs.date }}
     secrets:
-      CUOPT_DATASET_S3_URI: ${{ secrets.CUOPT_DATASET_S3_URI }}
+      CUOPT_S3_URI: ${{ secrets.CUOPT_S3_URI }}
       CUOPT_AWS_ACCESS_KEY_ID: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       CUOPT_AWS_SECRET_ACCESS_KEY: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
       CUOPT_SLACK_WEBHOOK_URL: ${{ secrets.CUOPT_SLACK_WEBHOOK_URL }}
diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index 04ef08682b..5cb0b0b639 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -32,13 +32,12 @@ export AWS_ACCESS_KEY_ID="${CUOPT_AWS_ACCESS_KEY_ID:-${AWS_ACCESS_KEY_ID:-}}"
 export AWS_SECRET_ACCESS_KEY="${CUOPT_AWS_SECRET_ACCESS_KEY:-${AWS_SECRET_ACCESS_KEY:-}}"
 unset AWS_SESSION_TOKEN
 
-if [ -z "${CUOPT_DATASET_S3_URI:-}" ]; then
-    echo "WARNING: CUOPT_DATASET_S3_URI is not set. Skipping nightly aggregation." >&2
-    echo "The per-matrix reports (uploaded by individual test jobs) are still available on S3."
+if [ -z "${CUOPT_S3_URI:-}" ]; then
+    echo "WARNING: CUOPT_S3_URI is not set. Skipping nightly aggregation." >&2
     exit 0
 fi
 
-S3_BASE="${CUOPT_DATASET_S3_URI}ci_test_reports/nightly"
+S3_BASE="${CUOPT_S3_URI}ci_test_reports/nightly"
 BRANCH_SLUG=$(echo "${BRANCH}" | tr '/' '-')
 # Per-matrix summaries are uploaded by test jobs under summaries/{date}/{branch}/.
 # For production nightlies (main, release/*), RAPIDS_BRANCH matches the branch input.
diff --git a/ci/utils/nightly_report_helper.sh b/ci/utils/nightly_report_helper.sh
index 0ab568c34d..deb887b441 100755
--- a/ci/utils/nightly_report_helper.sh
+++ b/ci/utils/nightly_report_helper.sh
@@ -70,8 +70,8 @@ generate_nightly_report() {
     local s3_summary_uri=""
     local s3_html_uri=""
 
-    if [ -n "${CUOPT_DATASET_S3_URI:-}" ]; then
-        local s3_base="${CUOPT_DATASET_S3_URI}ci_test_reports/nightly"
+    if [ -n "${CUOPT_S3_URI:-}" ]; then
+        local s3_base="${CUOPT_S3_URI}ci_test_reports/nightly"
         s3_history_uri="${s3_base}/history/${branch_slug}/${test_type}-${matrix_label}.json"
         s3_summary_uri="${s3_base}/summaries/${run_date}/${branch_slug}/${test_type}-${matrix_label}.json"
         s3_html_uri="${s3_base}/reports/${run_date}/${branch_slug}/${test_type}-${matrix_label}.html"

From 2e8a5ec5153b5dd1a2394fa62e6c2047a8a40bd4 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 16 Apr 2026 15:24:00 -0500
Subject: [PATCH 50/60] Unify S3 access under single CUOPT_S3_URI secret

Replace CUOPT_DATASET_S3_URI with CUOPT_S3_URI (bucket root, e.g.,
s3://cuopt-datasets/). Scripts append the appropriate path prefix:
- ci_datasets/routing/ for test datasets
- ci_test_reports/nightly/ for reports and dashboards

Set the repo secret CUOPT_S3_URI to the bucket root. The old
CUOPT_DATASET_S3_URI secret can be removed after this merges.
---
 .github/workflows/pr.yaml         | 16 ++++++++--------
 .github/workflows/test.yaml       | 28 ++++++++++++----------------
 ci/nightly_summary.sh             |  3 +--
 ci/utils/nightly_report_helper.sh |  2 +-
 datasets/get_test_data.sh         |  8 ++++----
 5 files changed, 26 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index a652c23b9a..be67501892 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -299,8 +299,8 @@ jobs:
       script: ci/test_cpp.sh
       matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }}
     secrets:
-      script-env-secret-1-key: CUOPT_DATASET_S3_URI
-      script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }}
+      script-env-secret-1-key: CUOPT_S3_URI
+      script-env-secret-1-value: ${{ secrets.CUOPT_S3_URI }}
       script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
@@ -323,8 +323,8 @@ jobs:
       script: ci/test_python.sh
       matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }}
     secrets:
-      script-env-secret-1-key: CUOPT_DATASET_S3_URI
-      script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }}
+      script-env-secret-1-key: CUOPT_S3_URI
+      script-env-secret-1-value: ${{ secrets.CUOPT_S3_URI }}
       script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
@@ -384,8 +384,8 @@ jobs:
       script: ci/test_wheel_cuopt.sh
       matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }}
     secrets:
-      script-env-secret-1-key: CUOPT_DATASET_S3_URI
-      script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }}
+      script-env-secret-1-key: CUOPT_S3_URI
+      script-env-secret-1-value: ${{ secrets.CUOPT_S3_URI }}
       script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
@@ -424,8 +424,8 @@ jobs:
       script: ci/test_wheel_cuopt_server.sh
       matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }}
     secrets:
-      script-env-secret-1-key: CUOPT_DATASET_S3_URI
-      script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }}
+      script-env-secret-1-key: CUOPT_S3_URI
+      script-env-secret-1-value: ${{ secrets.CUOPT_S3_URI }}
       script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index a7bafa5cbd..5246ed0124 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -40,14 +40,13 @@ jobs:
       sha: ${{ inputs.sha }}
       script: ci/test_cpp.sh
     secrets:
-      script-env-secret-1-key: CUOPT_DATASET_S3_URI
-      script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }}
+      script-env-secret-1-key: CUOPT_S3_URI
+      script-env-secret-1-value: ${{ secrets.CUOPT_S3_URI }}
       script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
-      script-env-secret-4-key: CUOPT_S3_URI
-      script-env-secret-4-value: ${{ secrets.CUOPT_S3_URI }}
+
   conda-python-tests:
     if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main
@@ -59,14 +58,13 @@ jobs:
       sha: ${{ inputs.sha }}
       script: ci/test_python.sh
     secrets:
-      script-env-secret-1-key: CUOPT_DATASET_S3_URI
-      script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }}
+      script-env-secret-1-key: CUOPT_S3_URI
+      script-env-secret-1-value: ${{ secrets.CUOPT_S3_URI }}
       script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
-      script-env-secret-4-key: CUOPT_S3_URI
-      script-env-secret-4-value: ${{ secrets.CUOPT_S3_URI }}
+
   wheel-tests-cuopt:
     if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
@@ -77,14 +75,13 @@ jobs:
       sha: ${{ inputs.sha }}
       script: ci/test_wheel_cuopt.sh
     secrets:
-      script-env-secret-1-key: CUOPT_DATASET_S3_URI
-      script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }}
+      script-env-secret-1-key: CUOPT_S3_URI
+      script-env-secret-1-value: ${{ secrets.CUOPT_S3_URI }}
       script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
-      script-env-secret-4-key: CUOPT_S3_URI
-      script-env-secret-4-value: ${{ secrets.CUOPT_S3_URI }}
+
   wheel-tests-cuopt-server:
     if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
@@ -95,14 +92,13 @@ jobs:
       sha: ${{ inputs.sha }}
       script: ci/test_wheel_cuopt_server.sh
     secrets:
-      script-env-secret-1-key: CUOPT_DATASET_S3_URI
-      script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }}
+      script-env-secret-1-key: CUOPT_S3_URI
+      script-env-secret-1-value: ${{ secrets.CUOPT_S3_URI }}
       script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID
       script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
       script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
-      script-env-secret-4-key: CUOPT_S3_URI
-      script-env-secret-4-value: ${{ secrets.CUOPT_S3_URI }}
+
   conda-notebook-tests:
     if: ${{ !inputs.summary-only }}
     secrets: inherit
diff --git a/ci/nightly_summary.sh b/ci/nightly_summary.sh
index 5cb0b0b639..41790d8b44 100755
--- a/ci/nightly_summary.sh
+++ b/ci/nightly_summary.sh
@@ -6,8 +6,7 @@
 # consolidated Slack notification.  Runs as a post-test job after all
 # matrix CI jobs finish.
 #
-# The script needs S3 access. It tries CUOPT_DATASET_S3_URI first, then
-# falls back to standard AWS env vars set by aws-actions/configure-aws-credentials.
+# The script needs S3 access via CUOPT_S3_URI (bucket root) and CUOPT_AWS_* credentials.
 #
 # Optional:
 #   CUOPT_SLACK_WEBHOOK_URL       - sends Slack if set
diff --git a/ci/utils/nightly_report_helper.sh b/ci/utils/nightly_report_helper.sh
index deb887b441..c3b77e6b7a 100755
--- a/ci/utils/nightly_report_helper.sh
+++ b/ci/utils/nightly_report_helper.sh
@@ -22,7 +22,7 @@
 #   RAPIDS_CUDA_VERSION   - CUDA version (e.g., "12.9")
 #   RAPIDS_PY_VERSION     - Python version (e.g., "3.12"), used with --with-python-version
 #   RAPIDS_BRANCH         - branch name (e.g., "main")
-#   CUOPT_DATASET_S3_URI  - S3 base URI for reports
+#   CUOPT_S3_URI          - S3 bucket root (e.g., s3://cuopt-datasets/)
 #   GITHUB_SHA            - commit SHA
 #   GITHUB_STEP_SUMMARY   - path for GitHub Actions step summary
 
diff --git a/datasets/get_test_data.sh b/datasets/get_test_data.sh
index 528455e133..472813a003 100755
--- a/datasets/get_test_data.sh
+++ b/datasets/get_test_data.sh
@@ -8,7 +8,7 @@ set -o pipefail
 ################################################################################
 # S3 Dataset Download Support
 ################################################################################
-# Set CUOPT_DATASET_S3_URI to base S3 path
+# Set CUOPT_S3_URI to S3 bucket root (e.g., s3://cuopt-datasets/)
 # AWS credentials should be configured via:
 #   - Environment variables (CUOPT_AWS_ACCESS_KEY_ID, CUOPT_AWS_SECRET_ACCESS_KEY)
 #   - Standard AWS variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
@@ -18,8 +18,8 @@ set -o pipefail
 function try_download_from_s3() {
     local s3_dirs=("$@")  # Array of directories to sync from S3
 
-    if [ -z "${CUOPT_DATASET_S3_URI:-}" ]; then
-        echo "CUOPT_DATASET_S3_URI not set, skipping S3 download..."
+    if [ -z "${CUOPT_S3_URI:-}" ]; then
+        echo "CUOPT_S3_URI not set, skipping S3 download..."
         return 1
     fi
 
@@ -35,7 +35,7 @@ function try_download_from_s3() {
     fi
 
     # Append routing subdirectory to base S3 URI
-    local s3_uri="${CUOPT_DATASET_S3_URI}routing/"
+    local s3_uri="${CUOPT_S3_URI}ci_datasets/routing/"
     echo "Downloading datasets from S3..."
 
     # Use CUOPT-specific credentials only

From 188ef3cf72ac4d1dff4c8ed2d169ba8c6ca21650 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 16 Apr 2026 15:25:24 -0500
Subject: [PATCH 51/60] Update last CUOPT_DATASET_S3_URI reference in developer
 skill

---
 skills/cuopt-developer/SKILL.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skills/cuopt-developer/SKILL.md b/skills/cuopt-developer/SKILL.md
index 98fe62f19c..66d41c003e 100644
--- a/skills/cuopt-developer/SKILL.md
+++ b/skills/cuopt-developer/SKILL.md
@@ -296,7 +296,7 @@ rmm::device_uvector<int> data(100, stream);
 | CUDA out of memory | Reduce problem size |
 | Slow debug library loading | Device symbols cause delay |
 <!-- skill-evolution:start — Cross-cutting change discipline -->
-| CI state doesn't persist between runs | CI containers are ephemeral. Never write persistent state to repo files from CI — use S3 (`CUOPT_DATASET_S3_URI`) or artifact stores. Ask: "After this container dies, does tomorrow's run see today's data?" |
+| CI state doesn't persist between runs | CI containers are ephemeral. Never write persistent state to repo files from CI — use S3 (`CUOPT_S3_URI`) or artifact stores. Ask: "After this container dies, does tomorrow's run see today's data?" |
 | CI state transitions go unreported | When CI tracks state over time (e.g. test failures), every transition (new failure, recurring, stabilized) needs an explicit notification path. Ask: "When state X changes to Y, who learns about it and how?" |
 | Designing CI features without lifecycle check | Before shipping any CI feature that tracks state: (1) Where does state live between runs? (2) What writes/reads it? (3) What happens on state transitions? Verify end-to-end, not just the happy-path logic. |
 | Change applied to only some targets | Before implementing, audit the full scope of what needs the change. For CI: `ls ci/test*.sh`. For APIs: grep all callers. For patterns: find every instance. Enumerate ALL targets first, implement second. |

From 46e1ca642f844e568dc57412a0d58407925e10c1 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Fri, 17 Apr 2026 10:52:16 -0500
Subject: [PATCH 52/60] Remove summary-only testing flags from build and test
 workflows

---
 .github/workflows/build.yaml | 21 +--------------------
 .github/workflows/test.yaml  |  9 ---------
 2 files changed, 1 insertion(+), 29 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 3ba0edd8c1..96766de4a2 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -37,10 +37,6 @@ on:
           If 'true', trigger the test workflow after all builds complete.
         type: boolean
         default: false
-      summary-only:
-        description: "If true, skip all build jobs and run only build-summary"
-        type: boolean
-        default: false
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
@@ -48,7 +44,6 @@ concurrency:
 
 jobs:
   cpp-build:
-    if: ${{ !inputs.summary-only }}
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@main
     with:
@@ -58,7 +53,6 @@ jobs:
       sha: ${{ inputs.sha }}
       script: ci/build_cpp.sh
   python-build:
-    if: ${{ !inputs.summary-only }}
     needs: [cpp-build]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@main
@@ -69,7 +63,6 @@ jobs:
       sha: ${{ inputs.sha }}
       script: ci/build_python.sh
   upload-conda:
-    if: ${{ !inputs.summary-only }}
     needs: [cpp-build, python-build]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@main
@@ -79,7 +72,6 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
   wheel-build-cuopt-mps-parser:
-    if: ${{ !inputs.summary-only }}
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main
     with:
@@ -94,7 +86,6 @@ jobs:
       # need 1 build per Python version and arch (but CUDA version doesn't matter so choose the latest)
       matrix_filter: 'group_by([.ARCH, (.PY_VER |split(".") | map(tonumber))])|map(max_by([(.CUDA_VER|split(".")|map(tonumber))]))'
   wheel-publish-cuopt-mps-parser:
-    if: ${{ !inputs.summary-only }}
     needs: wheel-build-cuopt-mps-parser
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main
@@ -106,7 +97,6 @@ jobs:
       package-name: cuopt_mps_parser
       package-type: python
   wheel-build-libcuopt:
-    if: ${{ !inputs.summary-only }}
     needs: wheel-build-cuopt-mps-parser
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main
@@ -120,7 +110,6 @@ jobs:
       package-type: cpp
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
   wheel-publish-libcuopt:
-    if: ${{ !inputs.summary-only }}
     needs: wheel-build-libcuopt
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main
@@ -132,7 +121,6 @@ jobs:
       package-name: libcuopt
       package-type: cpp
   wheel-build-cuopt:
-    if: ${{ !inputs.summary-only }}
     needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main
@@ -145,7 +133,6 @@ jobs:
       package-name: cuopt
       package-type: python
   wheel-publish-cuopt:
-    if: ${{ !inputs.summary-only }}
     needs: wheel-build-cuopt
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main
@@ -157,7 +144,6 @@ jobs:
       package-name: cuopt
       package-type: python
   wheel-build-cuopt-server:
-    if: ${{ !inputs.summary-only }}
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main
     with:
@@ -172,7 +158,6 @@ jobs:
       # Only need 1 package per CUDA major version. This selects "ARCH=amd64 + the latest supported Python, 1 job per major CUDA version".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
   wheel-publish-cuopt-server:
-    if: ${{ !inputs.summary-only }}
     needs: wheel-build-cuopt-server
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main
@@ -184,7 +169,6 @@ jobs:
       package-name: cuopt_server
       package-type: python
   docs-build:
-    if: ${{ !inputs.summary-only }}
     needs: [python-build]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main
@@ -200,7 +184,6 @@ jobs:
       container_image: "rapidsai/ci-conda:26.06-latest"
       script: "ci/build_docs.sh"
   wheel-build-cuopt-sh-client:
-    if: ${{ !inputs.summary-only }}
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main
     with:
@@ -216,7 +199,6 @@ jobs:
       # only need 1 build (noarch package): this selects amd64, oldest-supported Python, latest-supported CUDA
       matrix_filter: '[map(select(.ARCH == "amd64")) | min_by((.PY_VER | split(".") | map(tonumber)), (.CUDA_VER | split(".") | map(-tonumber)))]'
   wheel-publish-cuopt-sh-client:
-    if: ${{ !inputs.summary-only }}
     needs: wheel-build-cuopt-sh-client
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main
@@ -237,7 +219,7 @@ jobs:
       - wheel-publish-cuopt-server
       - wheel-publish-cuopt-sh-client
       - wheel-publish-libcuopt
-    if: ${{ inputs.trigger-tests && !inputs.summary-only }}
+    if: inputs.trigger-tests
     runs-on: ubuntu-latest
     # ref: https://docs.github.com/en/actions/reference/security/secure-use#use-an-intermediate-environment-variable
     env:
@@ -288,7 +270,6 @@ jobs:
         run: bash ci/build_summary.sh
 
   build-images:
-    if: ${{ !inputs.summary-only }}
     needs:
       - wheel-publish-cuopt
       - wheel-publish-cuopt-server
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 5246ed0124..60f56b0c95 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -24,14 +24,9 @@ on:
         description: "build_type: one of [branch, nightly, pull-request]"
         type: string
         default: nightly
-      summary-only:
-        description: "If true, skip all test jobs and run only nightly-summary"
-        type: boolean
-        default: false
 
 jobs:
   conda-cpp-tests:
-    if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main
     with:
       build_type: ${{ inputs.build_type }}
@@ -48,7 +43,6 @@ jobs:
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
 
   conda-python-tests:
-    if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main
     with:
       run_codecov: false
@@ -66,7 +60,6 @@ jobs:
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
 
   wheel-tests-cuopt:
-    if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
     with:
       build_type: ${{ inputs.build_type }}
@@ -83,7 +76,6 @@ jobs:
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
 
   wheel-tests-cuopt-server:
-    if: ${{ !inputs.summary-only }}
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
     with:
       build_type: ${{ inputs.build_type }}
@@ -100,7 +92,6 @@ jobs:
       script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
 
   conda-notebook-tests:
-    if: ${{ !inputs.summary-only }}
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main
     with:

From 31b12a11e11c8557efc04098ca63b1cbbc788184 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Fri, 17 Apr 2026 11:01:39 -0500
Subject: [PATCH 53/60] Add SPDX license header to dashboard HTML

---
 ci/dashboard/index.html | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ci/dashboard/index.html b/ci/dashboard/index.html
index 73329dea0c..a1ae36b9cb 100644
--- a/ci/dashboard/index.html
+++ b/ci/dashboard/index.html
@@ -1,3 +1,7 @@
+<!--
+SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+-->
 <!DOCTYPE html>
 <html lang="en">
 <head>

From 893029149a6abbc9dd3fa77663fe4ec01d583b4a Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Fri, 17 Apr 2026 11:27:31 -0500
Subject: [PATCH 54/60] Improve nightly CI reporting UX across Slack,
 dashboard, and HTML

- Increase error message truncation to 150 chars for new failures in
  Slack thread replies (recurring failures stay short)
- Append failing matrix labels to per-workflow summary in main Slack
  message (up to 3, then "+N more")
- Add per-job "View Logs" links in Slack thread workflow sections
- Add "Flake Rate" column and rename "Retries" to "Flake Count" in
  dashboard flaky tests table
- Add clickable "View Logs" links for failed jobs in build summary
- Make test names in HTML report failure tables link to source on GitHub
  when suite looks like a file path and sha is available
---
 ci/build_summary.sh                   |  4 +++
 ci/dashboard/index.html               | 20 ++++++++++++++-
 ci/utils/aggregate_nightly.py         | 22 ++++++++++++++--
 ci/utils/send_consolidated_summary.sh | 37 +++++++++++++++++++++++----
 4 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/ci/build_summary.sh b/ci/build_summary.sh
index e8fd81a436..4501fbd152 100755
--- a/ci/build_summary.sh
+++ b/ci/build_summary.sh
@@ -88,6 +88,10 @@ for group_name, group_jobs in sorted(groups.items()):
     if g_failed > 0:
         icon = ':x:'
         detail = f'{g_failed}/{g_total} failed'
+        # Add clickable log links for failed jobs
+        failed_in_group = [j for j in group_jobs if j.get('conclusion') == 'failure']
+        if failed_in_group and failed_in_group[0].get('html_url'):
+            detail += f'  <{failed_in_group[0]["html_url"]}|View Logs>'
     elif g_passed == g_total:
         icon = ':white_check_mark:'
         detail = f'{g_total} passed'
diff --git a/ci/dashboard/index.html b/ci/dashboard/index.html
index a1ae36b9cb..a36a9da0d7 100644
--- a/ci/dashboard/index.html
+++ b/ci/dashboard/index.html
@@ -457,14 +457,32 @@ <h1>cuOpt Nightly</h1>
     return '<p class="empty-state">No flaky tests matching current filters.</p>';
 
   let html = '<section><h2>Flaky Tests (passed on retry)</h2><table>';
-  html += '<tr><th>Test Type</th><th>Matrix</th><th>Suite</th><th>Test</th><th>Retries</th></tr>';
+  html += '<tr><th>Test Type</th><th>Matrix</th><th>Suite</th><th>Test</th><th>Flake Count</th><th>Flake Rate</th></tr>';
   for (const e of items) {
+    // Compute flake rate from index data if available (flaky / total across runs)
+    let flakeRate = '—';
+    if (S.index && S.index.dates) {
+      const currentBranch = S.current?.branch || 'main';
+      const dateEntries = Object.values(S.index.dates)
+        .filter(v => (v.branch || 'main') === currentBranch);
+      const totalRuns = dateEntries.length;
+      if (totalRuns > 0) {
+        const totalFlaky = dateEntries.reduce((sum, v) =>
+          sum + ((v.test_totals || {}).flaky || 0), 0);
+        const totalTests = dateEntries.reduce((sum, v) =>
+          sum + ((v.test_totals || {}).total || 0), 0);
+        if (totalTests > 0) {
+          flakeRate = (totalFlaky / totalTests * 100).toFixed(2) + '%';
+        }
+      }
+    }
     html += `<tr>
       <td>${esc(e.test_type||'')}</td>
       <td><code>${esc(e.matrix_label||'')}</code></td>
       <td>${esc(e.suite)}</td>
       <td><code>${esc(e.name)}</code> <span class="badge badge-flaky">FLAKY</span></td>
       <td>${e.retry_count||'?'}</td>
+      <td>${flakeRate}</td>
     </tr>`;
   }
   html += '</table></section>';
diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index 04989a4846..71d7579de9 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -401,6 +401,24 @@ def generate_consolidated_html(
   <div class="summary-card"><div class="num pass">{totals["resolved"]}</div><div class="lbl">Stabilized</div></div>
 </div>""")
 
+    # Helper: build a GitHub source link for test names when suite looks like a file path
+    def _test_name_html(entry):
+        """Return HTML for the test name, linked to source if suite looks like a file path."""
+        name_escaped = _html_escape(entry['name'])
+        suite = entry.get('suite', '')
+        # Find the sha from the matching grid entry
+        sha = "unknown"
+        for g in agg["matrix_grid"]:
+            if (g["test_type"] == entry.get("test_type")
+                    and g["matrix_label"] == entry.get("matrix_label")
+                    and g.get("sha")):
+                sha = g["sha"]
+                break
+        if sha != "unknown" and suite and ('/' in suite or suite.endswith('.py')):
+            url = f"https://github.com/NVIDIA/cuopt/blob/{_html_escape(sha)}/{_html_escape(suite)}"
+            return f'<a href="{url}" style="color:#1565c0;text-decoration:none"><code>{name_escaped}</code></a>'
+        return f"<code>{name_escaped}</code>"
+
     # --- New failures ---
     if agg["all_new_failures"]:
         parts.append("<section><h2>New Failures</h2><table>")
@@ -415,7 +433,7 @@ def generate_consolidated_html(
                 f"<tr><td>{_html_escape(e['test_type'])}</td>"
                 f"<td><code>{_html_escape(e['matrix_label'])}</code></td>"
                 f"<td>{_html_escape(e['suite'])}</td>"
-                f"<td><code>{_html_escape(e['name'])}</code></td>"
+                f"<td>{_test_name_html(e)}</td>"
                 f"<td><details><summary>{short}</summary>"
                 f'<pre class="error">{msg}</pre></details></td></tr>'
             )
@@ -435,7 +453,7 @@ def generate_consolidated_html(
                 f"<tr><td>{_html_escape(e['test_type'])}</td>"
                 f"<td><code>{_html_escape(e['matrix_label'])}</code></td>"
                 f"<td>{_html_escape(e['suite'])}</td>"
-                f"<td><code>{_html_escape(e['name'])}</code></td>"
+                f"<td>{_test_name_html(e)}</td>"
                 f"<td>{_html_escape(e.get('first_seen', '?'))}</td>"
                 f"<td><details><summary>{short}</summary>"
                 f'<pre class="error">{msg}</pre></details></td></tr>'
diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index 195a7d5797..199f19fc3d 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -159,16 +159,31 @@ for j in workflow_jobs:
     if j["conclusion"] == "failure":
         wf_counts[prefix]["failed"] += 1
 
+# Build a lookup: workflow prefix -> list of failing matrix_labels from grid
+wf_failing_labels = {}
+for g in grid:
+    if g["status"].startswith("failed"):
+        wf_failing_labels.setdefault(g["test_type"], []).append(g["matrix_label"])
+
 if failing_workflows:
     lines = []
     for wf in sorted(failing_workflows):
         counts = wf_counts.get(wf, {})
         f_count = counts.get("failed", 0)
         t_count = counts.get("total", 0)
+        # Append failing matrix labels (up to 3, then "+N more")
+        labels = wf_failing_labels.get(wf, [])
+        label_suffix = ""
+        if labels:
+            shown = labels[:3]
+            label_suffix = " (" + ", ".join(shown)
+            if len(labels) > 3:
+                label_suffix += f", +{len(labels) - 3} more"
+            label_suffix += ")"
         if t_count > 0:
-            lines.append(f":x:  *{wf}* — {f_count}/{t_count} failed")
+            lines.append(f":x:  *{wf}* — {f_count}/{t_count} failed{label_suffix}")
         else:
-            lines.append(f":x:  *{wf}* — failed")
+            lines.append(f":x:  *{wf}* — failed{label_suffix}")
     blocks.append({"type": "divider"})
     blocks.append({
         "type": "section",
@@ -228,13 +243,13 @@ if issues_by_wf:
         wf_blocks = []
         wf_text = f"*{wf_name}*\n"
 
-        # New failures
+        # New failures (show more error context — 150 chars)
         for f_entry in issues["new"][:10]:
-            msg = f_entry.get("message", "")[:60].replace("\n", " ")
+            msg = f_entry.get("message", "")[:150].replace("\n", " ")
             matrix = f_entry.get("matrix_label", "")
             wf_text += f":new:  `{f_entry['name']}` ({matrix}) — {msg}\n"
 
-        # Recurring failures
+        # Recurring failures (shorter — just show since date)
         for f_entry in issues["recurring"][:10]:
             matrix = f_entry.get("matrix_label", "")
             first = f_entry.get("first_seen", "?")
@@ -257,6 +272,18 @@ if issues_by_wf:
             if len(issues[category]) > limit:
                 wf_text += f"_...+{len(issues[category]) - limit} more {label}_\n"
 
+        # Per-job log links: find workflow_jobs matching this workflow prefix
+        job_urls = [j["url"] for j in workflow_jobs
+                    if j.get("url") and j["name"].split(" / ")[0] == wf_name
+                    and j["conclusion"] == "failure"]
+        if not job_urls:
+            # Also try matching by test_type prefix for tracked jobs
+            job_urls = [j["url"] for j in workflow_jobs
+                        if j.get("url") and j["name"].startswith(wf_name)
+                        and j["conclusion"] == "failure"]
+        if job_urls:
+            wf_text += f"<{job_urls[0]}|:link: View Logs>\n"
+
         # Chunk if needed
         while wf_text:
             chunk = wf_text[:2900]

From 53db6009ea87d4fb00e11dda778017be9982efbe Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Fri, 17 Apr 2026 11:33:07 -0500
Subject: [PATCH 55/60] Ping user on new failures and reorder: new > flaky >
 recurring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Mention @rgsl888prabhu in Slack only when there are NEW failures
  (not for recurring or flaky-only runs)
- Reorder everywhere: new failures first, then flaky, then recurring,
  then resolved — consistent across Slack thread, HTML report, and
  dashboard tabs
---
 ci/dashboard/index.html               |  2 +-
 ci/utils/aggregate_nightly.py         | 34 +++++++++++++--------------
 ci/utils/send_consolidated_summary.sh | 18 +++++++-------
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/ci/dashboard/index.html b/ci/dashboard/index.html
index a36a9da0d7..2cdd7406ce 100644
--- a/ci/dashboard/index.html
+++ b/ci/dashboard/index.html
@@ -194,9 +194,9 @@ <h1>cuOpt Nightly</h1>
 
   <div class="tab-bar" id="tab-bar">
     <button class="tab-btn active" data-tab="failures">Failures</button>
-    <button class="tab-btn" data-tab="matrix">Matrix Grid</button>
     <button class="tab-btn" data-tab="flaky">Flaky</button>
     <button class="tab-btn" data-tab="resolved">Stabilized</button>
+    <button class="tab-btn" data-tab="matrix">Matrix Grid</button>
     <button class="tab-btn" data-tab="trends">Trends</button>
   </div>
 
diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index 71d7579de9..17cea4dec7 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -439,6 +439,23 @@ def _test_name_html(entry):
             )
         parts.append("</table></section>")
 
+    # --- Flaky ---
+    if agg["all_flaky_tests"]:
+        parts.append("<section><h2>Flaky Tests</h2><table>")
+        parts.append(
+            "<tr><th>Test Type</th><th>Matrix</th><th>Suite</th>"
+            "<th>Test</th><th>Retries</th></tr>"
+        )
+        for e in agg["all_flaky_tests"]:
+            parts.append(
+                f"<tr><td>{_html_escape(e['test_type'])}</td>"
+                f"<td><code>{_html_escape(e['matrix_label'])}</code></td>"
+                f"<td>{_html_escape(e['suite'])}</td>"
+                f"<td><code>{_html_escape(e['name'])}</code></td>"
+                f"<td>{e.get('retry_count', '?')}</td></tr>"
+            )
+        parts.append("</table></section>")
+
     # --- Recurring failures ---
     if agg["all_recurring_failures"]:
         parts.append("<section><h2>Recurring Failures</h2><table>")
@@ -478,23 +495,6 @@ def _test_name_html(entry):
             )
         parts.append("</table></section>")
 
-    # --- Flaky ---
-    if agg["all_flaky_tests"]:
-        parts.append("<section><h2>Flaky Tests</h2><table>")
-        parts.append(
-            "<tr><th>Test Type</th><th>Matrix</th><th>Suite</th>"
-            "<th>Test</th><th>Retries</th></tr>"
-        )
-        for e in agg["all_flaky_tests"]:
-            parts.append(
-                f"<tr><td>{_html_escape(e['test_type'])}</td>"
-                f"<td><code>{_html_escape(e['matrix_label'])}</code></td>"
-                f"<td>{_html_escape(e['suite'])}</td>"
-                f"<td><code>{_html_escape(e['name'])}</code></td>"
-                f"<td>{e.get('retry_count', '?')}</td></tr>"
-            )
-        parts.append("</table></section>")
-
     if (
         not agg["all_new_failures"]
         and not agg["all_recurring_failures"]
diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index 199f19fc3d..02e332e13d 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -107,8 +107,8 @@ untracked_count = len(untracked_failed)
 
 if has_failures and (has_new or untracked_count > 0):
     emoji = ":rotating_light:"
-    text = f"{len(failing_workflows)} workflow(s) with failures"
-    mention = ""
+    text = f"{len(failing_workflows)} workflow(s) with NEW failures"
+    mention = "<@rgsl888prabhu> "
 elif has_failures:
     emoji = ":x:"
     text = f"Recurring failures in {len(failing_workflows)} workflow(s)"
@@ -243,23 +243,23 @@ if issues_by_wf:
         wf_blocks = []
         wf_text = f"*{wf_name}*\n"
 
-        # New failures (show more error context — 150 chars)
+        # New failures first (most urgent, show more error context)
         for f_entry in issues["new"][:10]:
             msg = f_entry.get("message", "")[:150].replace("\n", " ")
             matrix = f_entry.get("matrix_label", "")
             wf_text += f":new:  `{f_entry['name']}` ({matrix}) — {msg}\n"
 
-        # Recurring failures (shorter — just show since date)
+        # Flaky (actionable — tests that are unstable)
+        for f_entry in issues["flaky"][:10]:
+            matrix = f_entry.get("matrix_label", "")
+            wf_text += f":warning:  `{f_entry['name']}` ({matrix})\n"
+
+        # Recurring failures (known issues)
         for f_entry in issues["recurring"][:10]:
             matrix = f_entry.get("matrix_label", "")
             first = f_entry.get("first_seen", "?")
             wf_text += f":repeat:  `{f_entry['name']}` ({matrix}) — since {first}\n"
 
-        # Flaky
-        for f_entry in issues["flaky"][:10]:
-            matrix = f_entry.get("matrix_label", "")
-            wf_text += f":warning:  `{f_entry['name']}` ({matrix})\n"
-
         # Resolved
         for r in issues["resolved"][:5]:
             matrix = r.get("matrix_label", "")

From 9c3cc99ff3cc71e63010a0cfd9ff9b873453a3ff Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Fri, 17 Apr 2026 11:58:43 -0500
Subject: [PATCH 56/60] Show useful error details in HTML report summaries

Extract the last line of error messages (usually the assertion or
exception) instead of the first line (usually the test method
signature). Increases visible summary to 200 chars. Full error
still available in expandable details.
---
 ci/utils/aggregate_nightly.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index 17cea4dec7..f515f36acd 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -419,6 +419,25 @@ def _test_name_html(entry):
             return f'<a href="{url}" style="color:#1565c0;text-decoration:none"><code>{name_escaped}</code></a>'
         return f"<code>{name_escaped}</code>"
 
+    def _error_summary(message, max_len=200):
+        """Extract the most useful part of an error message for display.
+        Prefers the last line (usually the assertion) over the first
+        (usually the test method signature)."""
+        if not message:
+            return ""
+        lines = [l.strip() for l in message.strip().splitlines() if l.strip()]
+        # Use the last non-empty line (typically the assertion/error)
+        if lines:
+            summary = lines[-1]
+            # If the last line is very short, include the previous line too
+            if len(summary) < 40 and len(lines) > 1:
+                summary = lines[-2] + " — " + summary
+        else:
+            summary = message
+        if len(summary) > max_len:
+            summary = summary[:max_len] + "..."
+        return summary
+
     # --- New failures ---
     if agg["all_new_failures"]:
         parts.append("<section><h2>New Failures</h2><table>")
@@ -428,7 +447,7 @@ def _test_name_html(entry):
         )
         for e in agg["all_new_failures"]:
             msg = _html_escape(e.get("message", ""))
-            short = _html_escape(e.get("message", "")[:100])
+            short = _html_escape(_error_summary(e.get("message", "")))
             parts.append(
                 f"<tr><td>{_html_escape(e['test_type'])}</td>"
                 f"<td><code>{_html_escape(e['matrix_label'])}</code></td>"
@@ -465,7 +484,7 @@ def _test_name_html(entry):
         )
         for e in agg["all_recurring_failures"]:
             msg = _html_escape(e.get("message", ""))
-            short = _html_escape(e.get("message", "")[:100])
+            short = _html_escape(_error_summary(e.get("message", "")))
             parts.append(
                 f"<tr><td>{_html_escape(e['test_type'])}</td>"
                 f"<td><code>{_html_escape(e['matrix_label'])}</code></td>"

From 1d064d741143e23d774f0473edaef941c7f33ea3 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Fri, 17 Apr 2026 12:01:17 -0500
Subject: [PATCH 57/60] Chunk all Slack message blocks to respect 3000-char
 limit

Add chunking to per-workflow failure block in main message,
per-matrix summary sections (failures, stabilized, flaky), and
build summary job list. Prevents invalid_blocks errors when many
workflows or tests fail simultaneously.
---
 ci/build_summary.sh                   | 15 +++++++------
 ci/utils/send_consolidated_summary.sh | 19 ++++++++++++----
 ci/utils/send_nightly_summary.sh      | 32 +++++++++++++--------------
 3 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/ci/build_summary.sh b/ci/build_summary.sh
index 4501fbd152..f61a402a66 100755
--- a/ci/build_summary.sh
+++ b/ci/build_summary.sh
@@ -100,13 +100,14 @@ for group_name, group_jobs in sorted(groups.items()):
         detail = f'{g_passed}/{g_total} passed'
     lines.append(f'{icon}  *{group_name}* \u2014 {detail}')
 
-text = '\n'.join(lines)
-if len(text) > 2900:
-    text = text[:2900] + '\n_...truncated_'
-blocks.append({
-    'type': 'section',
-    'text': {'type': 'mrkdwn', 'text': text},
-})
+current = ''
+for line in lines:
+    if current and len(current) + len(line) + 1 > 2900:
+        blocks.append({'type': 'section', 'text': {'type': 'mrkdwn', 'text': current.rstrip()}})
+        current = ''
+    current += line + '\n'
+if current.strip():
+    blocks.append({'type': 'section', 'text': {'type': 'mrkdwn', 'text': current.rstrip()}})
 
 # Link
 if run_url:
diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index 02e332e13d..a568d237ab 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -185,10 +185,21 @@ if failing_workflows:
         else:
             lines.append(f":x:  *{wf}* — failed{label_suffix}")
     blocks.append({"type": "divider"})
-    blocks.append({
-        "type": "section",
-        "text": {"type": "mrkdwn", "text": "\n".join(lines)},
-    })
+    # Chunk to stay within Slack's 3000-char block limit
+    current = ""
+    for line in lines:
+        if current and len(current) + len(line) + 1 > 2900:
+            blocks.append({
+                "type": "section",
+                "text": {"type": "mrkdwn", "text": current.rstrip()},
+            })
+            current = ""
+        current += line + "\n"
+    if current.strip():
+        blocks.append({
+            "type": "section",
+            "text": {"type": "mrkdwn", "text": current.rstrip()},
+        })
 
 # Links in main message
 link_parts = []
diff --git a/ci/utils/send_nightly_summary.sh b/ci/utils/send_nightly_summary.sh
index 7b39a02cec..63c742a2ca 100755
--- a/ci/utils/send_nightly_summary.sh
+++ b/ci/utils/send_nightly_summary.sh
@@ -92,20 +92,27 @@ blocks.append({
 
 blocks.append({"type": "divider"})
 
+def chunk_lines_to_blocks(header, lines, blocks, limit=2900):
+    """Add lines as section blocks, chunking to stay under Slack's char limit."""
+    current = f"*{header}*\n"
+    for line in lines:
+        if len(current) + len(line) + 1 > limit:
+            blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": current.rstrip()}})
+            current = ""
+        current += line + "\n"
+    if current.strip():
+        blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": current.rstrip()}})
+
 # --- Genuine failures ---
 if failed > 0:
     lines = []
     for f_entry in d.get("new_failures", []):
-        msg = f_entry.get("message", "")[:60].replace("\n", " ")
+        msg = f_entry.get("message", "")[:150].replace("\n", " ")
         lines.append(f"  :new:  `{f_entry['name']}` ({f_entry['suite']}) \u2014 {msg}")
     for f_entry in d.get("recurring_failures", []):
-        msg = f_entry.get("message", "")[:60].replace("\n", " ")
         first = f_entry.get("first_seen", "?")
         lines.append(f"  :repeat:  `{f_entry['name']}` ({f_entry['suite']}) \u2014 since {first}")
-    blocks.append({
-        "type": "section",
-        "text": {"type": "mrkdwn", "text": "*Genuine Failures:*\n" + "\n".join(lines)},
-    })
+    chunk_lines_to_blocks("Genuine Failures:", lines, blocks)
 
 # --- Stabilized tests ---
 resolved_list = d.get("resolved_tests", [])
@@ -119,13 +126,7 @@ if resolved_list:
             f"  :white_check_mark:  `{r['name']}` ({r['suite']}) \u2014 "
             f"failing since {since}, failed {count}x{flaky_tag}"
         )
-    blocks.append({
-        "type": "section",
-        "text": {
-            "type": "mrkdwn",
-            "text": "*Stabilized (were failing, now pass):*\n" + "\n".join(lines),
-        },
-    })
+    chunk_lines_to_blocks("Stabilized (were failing, now pass):", lines, blocks)
 
 # --- Flaky tests ---
 flaky_list = d.get("flaky_tests", [])
@@ -134,10 +135,7 @@ if flaky_list:
     for f_entry in flaky_list:
         retries = f_entry.get("retry_count", "?")
         lines.append(f"  :warning:  `{f_entry['name']}` ({f_entry['suite']}) \u2014 {retries} retries")
-    blocks.append({
-        "type": "section",
-        "text": {"type": "mrkdwn", "text": "*Flaky Tests (passed on retry):*\n" + "\n".join(lines)},
-    })
+    chunk_lines_to_blocks("Flaky Tests (passed on retry):", lines, blocks)
 
 # --- Links ---
 link_parts = []

From 4430650c5fc94bb243c752d0adb95f2f204e6878 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Fri, 17 Apr 2026 13:32:59 -0500
Subject: [PATCH 58/60] Keep error assertion in JUnit XML message extraction

The traceback in JUnit XML failure elements starts with the test
method signature and ends with the actual assertion/error. Taking
the first 500 chars often cut off before the useful part. Now
keeps the first line (context) plus the last 500 chars (where the
assertion lives).
---
 ci/utils/nightly_report.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/ci/utils/nightly_report.py b/ci/utils/nightly_report.py
index 2bd23b1f18..b836d82694 100755
--- a/ci/utils/nightly_report.py
+++ b/ci/utils/nightly_report.py
@@ -98,12 +98,28 @@ def parse_junit_xml(xml_path):
                 status = "failed"
                 message = failure.get("message", "")
                 if failure.text:
-                    message = failure.text[:500]
+                    # Keep the last 500 chars (where the assertion/error is)
+                    # plus the first line for context
+                    text = failure.text.strip()
+                    lines = text.splitlines()
+                    first_line = lines[0] if lines else ""
+                    last_chunk = text[-500:] if len(text) > 500 else text
+                    if len(text) > 500:
+                        message = first_line + "\n...\n" + last_chunk
+                    else:
+                        message = text
             elif error is not None:
                 status = "error"
                 message = error.get("message", "")
                 if error.text:
-                    message = error.text[:500]
+                    text = error.text.strip()
+                    lines = text.splitlines()
+                    first_line = lines[0] if lines else ""
+                    last_chunk = text[-500:] if len(text) > 500 else text
+                    if len(text) > 500:
+                        message = first_line + "\n...\n" + last_chunk
+                    else:
+                        message = text
             else:
                 status = "passed"
                 message = ""

From f589b76a60715141609ff11be4a080d1717d83a6 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Fri, 17 Apr 2026 13:42:14 -0500
Subject: [PATCH 59/60] Add self-maintaining failed job links in Slack thread
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of mapping test_type to GitHub job names (fragile), add a
separate thread reply listing all failed jobs with direct clickable
links to their GitHub Actions logs. Self-maintaining — any new
workflow automatically appears.
---
 ci/utils/send_consolidated_summary.sh | 33 +++++++++++++++++----------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/ci/utils/send_consolidated_summary.sh b/ci/utils/send_consolidated_summary.sh
index a568d237ab..88f238899b 100755
--- a/ci/utils/send_consolidated_summary.sh
+++ b/ci/utils/send_consolidated_summary.sh
@@ -283,18 +283,6 @@ if issues_by_wf:
             if len(issues[category]) > limit:
                 wf_text += f"_...+{len(issues[category]) - limit} more {label}_\n"
 
-        # Per-job log links: find workflow_jobs matching this workflow prefix
-        job_urls = [j["url"] for j in workflow_jobs
-                    if j.get("url") and j["name"].split(" / ")[0] == wf_name
-                    and j["conclusion"] == "failure"]
-        if not job_urls:
-            # Also try matching by test_type prefix for tracked jobs
-            job_urls = [j["url"] for j in workflow_jobs
-                        if j.get("url") and j["name"].startswith(wf_name)
-                        and j["conclusion"] == "failure"]
-        if job_urls:
-            wf_text += f"<{job_urls[0]}|:link: View Logs>\n"
-
         # Chunk if needed
         while wf_text:
             chunk = wf_text[:2900]
@@ -306,6 +294,27 @@ if issues_by_wf:
 
         print(make_payload(wf_blocks))
 
+# ── Thread: Failed job log links ──────────────────────────────────────
+failed_job_links = [j for j in workflow_jobs if j["conclusion"] == "failure" and j.get("url")]
+if failed_job_links:
+    link_blocks = []
+    current = "*Failed Job Logs:*\n"
+    for j in failed_job_links:
+        line = f":x:  <{j['url']}|{j['name']}>\n"
+        if len(current) + len(line) > 2900:
+            link_blocks.append({
+                "type": "section",
+                "text": {"type": "mrkdwn", "text": current.rstrip()},
+            })
+            current = ""
+        current += line
+    if current.strip():
+        link_blocks.append({
+            "type": "section",
+            "text": {"type": "mrkdwn", "text": current.rstrip()},
+        })
+    print(make_payload(link_blocks))
+
 PYEOF
 )
 

From 2e49363daf87b45483ab60945c7f26d35bc193ed Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Fri, 17 Apr 2026 15:39:34 -0500
Subject: [PATCH 60/60] update

---
 ci/utils/nightly_report.py | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/ci/utils/nightly_report.py b/ci/utils/nightly_report.py
index b836d82694..55a39d89bf 100755
--- a/ci/utils/nightly_report.py
+++ b/ci/utils/nightly_report.py
@@ -98,28 +98,12 @@ def parse_junit_xml(xml_path):
                 status = "failed"
                 message = failure.get("message", "")
                 if failure.text:
-                    # Keep the last 500 chars (where the assertion/error is)
-                    # plus the first line for context
-                    text = failure.text.strip()
-                    lines = text.splitlines()
-                    first_line = lines[0] if lines else ""
-                    last_chunk = text[-500:] if len(text) > 500 else text
-                    if len(text) > 500:
-                        message = first_line + "\n...\n" + last_chunk
-                    else:
-                        message = text
+                    message = failure.text.strip()
             elif error is not None:
                 status = "error"
                 message = error.get("message", "")
                 if error.text:
-                    text = error.text.strip()
-                    lines = text.splitlines()
-                    first_line = lines[0] if lines else ""
-                    last_chunk = text[-500:] if len(text) > 500 else text
-                    if len(text) > 500:
-                        message = first_line + "\n...\n" + last_chunk
-                    else:
-                        message = text
+                    message = error.text.strip()
             else:
                 status = "passed"
                 message = ""
@@ -560,7 +544,7 @@ def generate_json_summary(
                 "suite": e["suite"],
                 "name": e["name"],
                 "classname": e["classname"],
-                "message": e.get("message", "")[:200],
+                "message": e.get("message", ""),
             }
             for e in new_failures
         ],
@@ -570,7 +554,7 @@ def generate_json_summary(
                 "name": e["name"],
                 "classname": e["classname"],
                 "first_seen": e.get("first_seen", "unknown"),
-                "message": e.get("message", "")[:200],
+                "message": e.get("message", ""),
             }
             for e in recurring_failures
         ],