diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index d9b619f9559..02dcf273d68 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,43 +1,4 @@
-megatron/core/ @NVIDIA/core-adlr @NVIDIA/core-nemo
-
-megatron/core/models/gpt/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/gpt
-
-megatron/core/models/multimodal/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/multi-modal
-
-megatron/core/models/mamba/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/hybrid-mamba
-megatron/core/ssm/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/hybrid-mamba
-
-megatron/core/datasets/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/datasets
-
-megatron/core/tokenizers/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/tokenizers
-
-megatron/core/distributed/fsdp/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/megatron-fsdp
-
-megatron/core/transformer/fsdp_dtensor_checkpoint.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/megatron-fsdp
-
-megatron/core/dist_checkpointing/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/dist-checkpointing
-
-megatron/core/optimizer/distrib_optimizer/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/dist-optimizer
-
-megatron/core/inference/modelopt_support @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/quantization-and-inference
-
-megatron/core/datasets/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/datasets
-
-megatron/core/pipeline_parallel/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/pipeline-parallelism
-
-megatron/core/transformer/ @NVIDIA/core-adlr @NVIDIA/core-nemo
-
-megatron/core/transformer/moe/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/mixture-of-experts-adlr @NVIDIA/mixture-of-experts-devtech
-
-megatron/core/inference/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/inference
-
-megatron/core/parallel_state.py @NVIDIA/core-adlr @NVIDIA/core-nemo
-
-megatron/core/post_training/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/post-training
-
-megatron/post_training/ @NVIDIA/post-training
-
-megatron/core/transformer/cuda_graphs.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/cuda-graphs
+* @NVIDIA/core-nemo @NVIDIA/core-devtech
 
 megatron/training/ @NVIDIA/training-adlr @NVIDIA/training-nemo
 megatron/training/arguments.py
@@ -46,19 +7,9 @@ megatron/training/arguments.py
 .github/ @NVIDIA/ci
 .gitlab-ci.yml @NVIDIA/ci
 docker/  @NVIDIA/ci
+tests/unit_tests/run_ci_test.sh @NVIDIA/ci
+tests/test_utils/python_scripts/
 tests/functional_tests/python_test_utils/ @NVIDIA/ci
 tests/functional_tests/shell_test_utils/ @NVIDIA/ci
-tests/test_utils/recipes/ @NVIDIA/ci
-tests/unit_tests/run_ci_test.sh @NVIDIA/ci
-
-# API Backwards Compatibility Check
-scripts/check_api_backwards_compatibility.py @NVIDIA/ci
-scripts/README_API_COMPAT.md @NVIDIA/ci
-.github/workflows/check_api_backwards_compatibility_workflow.yml @NVIDIA/ci
-docs/api-backwards-compatibility-check.md @NVIDIA/ci
-tests/unit_tests/test_api_backwards_compat_setup.py @NVIDIA/ci
-
-megatron/rl/ @NVIDIA/reinforcement-learning
-examples/rl/ @NVIDIA/reinforcement-learning
-test/unit_tests/test_rl_utils.py @NVIDIA/reinforcement-learning
-train_rl.py @NVIDIA/reinforcement-learning
+pyproject.toml @NVIDIA/ci
+uv.lock @NVIDIA/ci
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 0d2d5b9577e..a3cfb5f848a 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -78,8 +78,8 @@ jobs:
           IS_MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
           SCHEDULED_JOB: ${{ github.event_name == 'schedule' }}
         run: |
-          # Skip SSO check for scheduled jobs, main branch, or merge groups
-          if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ] || [ "${IS_MERGE_GROUP}" == "true" ]; then
+          # Skip SSO check for scheduled jobs, main branch, dev branch, or merge groups
+          if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ] || [ "${IS_DEV_BRANCH}" == "true" ] || [ "${IS_MERGE_GROUP}" == "true" ]; then
             echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
             exit 0
           fi
diff --git a/.github/workflows/mirror-to-main.yml b/.github/workflows/mirror-to-main.yml
new file mode 100644
index 00000000000..cb77851942b
--- /dev/null
+++ b/.github/workflows/mirror-to-main.yml
@@ -0,0 +1,129 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: Mirror Dev to Main
+
+on:
+  push:
+    branches:
+      - "pull-request/[0-9]+"
+
+jobs:
+  cherry-pick-to-main:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.PAT }}
+
+      - name: Get PR info
+        id: get-pr-info
+        uses: nv-gha-runners/get-pr-info@main
+
+      - name: Configure Git
+        run: |
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+          git config --global user.name "GitHub Actions Bot"
+
+      - name: Cherry-pick to main
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          set -x
+
+          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          BASE_REF="${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }}"
+          HAS_MIRROR_MAIN_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "mirror-to-main")' || echo "false")
+          TARGET_BRANCH="cherry-pick-$PR_NUMBER-into-main"
+
+          # Skip if not labeled with mirror-to-main
+          if [ "$HAS_MIRROR_MAIN_LABEL" != "true" ]; then
+            echo "PR is not labeled with mirror-to-main, will not mirror to main."
+            exit 0
+          fi
+
+          # Skip if not targeting dev
+          if [ "$BASE_REF" != "dev" ]; then
+            echo "PR is not targeting dev, will not mirror to main."
+            exit 0
+          fi
+
+          # Check if target branch already exists
+          if git ls-remote --heads origin "refs/heads/$TARGET_BRANCH" | grep -q .; then
+            echo "Target branch already exists, will not cherry-pick again."
+            exit 0
+          fi
+
+          # Get PR details
+          PR_AUTHOR="${{ fromJSON(steps.get-pr-info.outputs.pr-info).user.login }}"
+          PR_TITLE="${{ fromJSON(steps.get-pr-info.outputs.pr-info).title }}"
+          SOURCE_BRANCH="${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.ref }}"
+          SOURCE_REPO="${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.repo.full_name }}"
+
+          # Fetch all branches
+          git fetch origin dev
+
+          # Handle forks vs same repo
+          if [ "$SOURCE_REPO" = "${{ github.repository }}" ]; then
+            git fetch origin "$SOURCE_BRANCH"
+            git checkout "$SOURCE_BRANCH"
+          else
+            git fetch "https://github.com/$SOURCE_REPO.git" "$SOURCE_BRANCH"
+            git checkout FETCH_HEAD
+          fi
+
+          # Find commit range to cherry-pick
+          START_COMMIT=$(git merge-base origin/dev HEAD)
+          END_COMMIT=$(git rev-parse HEAD)
+
+          # Create cherry-pick branch from main
+          git fetch origin main
+          git checkout main
+          git checkout -b "$TARGET_BRANCH"
+
+          # Cherry-pick commits
+          if ! git cherry-pick "$START_COMMIT..$END_COMMIT"; then
+            # Comment on the original PR about the failure
+            COMMENT_BODY=$(cat <<'EOF'
+          ❌ **Cherry-pick to main failed**
+
+          The cherry-pick encountered conflicts and could not be completed automatically.
+
+          **Next steps:**
+          1. Manually create a PR with these changes to main
+          2. Resolve any conflicts
+          EOF
+            )
+
+            gh pr comment $PR_NUMBER --body "$COMMENT_BODY"
+            exit 1
+          fi
+
+          # Push branch
+          git push -u origin "$TARGET_BRANCH"
+
+          # Create PR to main
+          gh pr create \
+            --base main \
+            --head "$TARGET_BRANCH" \
+            --title "cp: \`$PR_TITLE ($PR_NUMBER)\` into \`main\`" \
+            --body "[🤖]: Hi @$PR_AUTHOR 👋<br><br>We've cherry-picked \`$PR_TITLE (#$PR_NUMBER)\` into \`main\` for you! 🚀<br><br>Please review and approve this cherry-pick at your convenience!" \
+            --label "cherry-pick" \
+            --reviewer "$PR_AUTHOR"
+
diff --git a/.github/workflows/multi-approval-bot.yml b/.github/workflows/multi-approval-bot.yml
deleted file mode 100644
index c7477679201..00000000000
--- a/.github/workflows/multi-approval-bot.yml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: "Codeowners Approval Workflow"
-
-on:
-  push:
-    branches:
-      - "pull-request/[0-9]+"
-  merge_group:
-    types: [checks_requested]
-
-jobs:
-  pre-flight:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
-    if: github.repository == 'NVIDIA/Megatron-LM'
-
-  codeowners-approval:
-    needs: [pre-flight]
-    runs-on: ubuntu-latest
-    if: |
-      !(needs.pre-flight.outputs.docs_only == 'true'
-      || needs.pre-flight.outputs.is_merge_group == 'true'
-      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
-    steps:
-      - name: Get PR info
-        id: get-pr-info
-        if: startsWith(github.ref, 'refs/heads/pull-request/')
-        uses: nv-gha-runners/get-pr-info@main
-
-      - name: Checkout action
-        uses: actions/checkout@v6
-        with:
-          repository: noamelf/codeowner-multi-approval-action
-          ref: v0.1
-          path: codeowner-multi-approval-action
-
-      - name: Check Codeowners Approval
-        uses: ./codeowner-multi-approval-action
-        with:
-          pr-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
-          repo-name: ${{ github.repository }}
-          github-token: ${{ secrets.PAT }}
-
-  multi-approval-bot-summary:
-    needs: [pre-flight, codeowners-approval]
-    if: |
-      (
-        needs.pre-flight.outputs.docs_only == 'true'
-        || needs.pre-flight.outputs.is_merge_group == 'true'
-        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
-        || always()
-      )
-      && github.repository == 'NVIDIA/Megatron-LM'
-      && !cancelled()
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-
-      - name: Result
-        env:
-          GH_TOKEN: ${{ github.token }}
-          GITHUB_RUN_ID: ${{ github.run_id }}
-          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
-        run: |
-          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
-
-          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
-              echo "✅ All previous jobs completed successfully"
-              exit 0
-          else
-              echo "❌ Found $FAILED_JOBS failed job(s)"
-              # Show which jobs failed
-              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
-              exit 1
-          fi
diff --git a/.gitlab/stages/00.pre.yml b/.gitlab/stages/00.pre.yml
index 1d35494dcd6..e00ce8afc36 100644
--- a/.gitlab/stages/00.pre.yml
+++ b/.gitlab/stages/00.pre.yml
@@ -71,6 +71,7 @@ pre:create_ci_branches_dev:
       - branch: ci-dev-rebuild-mcore-nemo-image
       - branch: ci-dev-mr
       - branch: ci-dev-nightly
+      - branch: ci-dev-weekly
       - branch: ci-dev-upgrade-dependencies
   tags:
     - arch/amd64
diff --git a/.gitlab/stages/04.functional-tests.yml b/.gitlab/stages/04.functional-tests.yml
index 55c4d740659..002c96e7c0f 100644
--- a/.gitlab/stages/04.functional-tests.yml
+++ b/.gitlab/stages/04.functional-tests.yml
@@ -255,7 +255,7 @@ functional:x_notify:
     - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
     - export GITLAB_ENDPOINT
     - export CONTEXT=$FUNCTIONAL_TEST_SCOPE
-    - export TAG_TEAM=$([[ "$CI_COMMIT_BRANCH" == "main" ]] && echo "1" || "0")
+    - export TAG_TEAM=$([[ "$CI_COMMIT_BRANCH" == "main" || "$CI_COMMIT_BRANCH" == "dev" ]] && echo "1" || "0")
     - export TEAM_SLUG=$SLACK_ADMIN
     - |
       python tests/test_utils/python_scripts/notify.py \
@@ -269,7 +269,7 @@ functional:x_notify:
     paths:
       - scripts
   rules:
-    - if: ($CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_BRANCH == "main") && $FUNCTIONAL_TEST == "yes"
+    - if: ($CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "dev") && $FUNCTIONAL_TEST == "yes"
       when: always
     - when: never
 
diff --git a/README.md b/README.md
index 9a62f9bb750..b22a8d0e8f6 100644
--- a/README.md
+++ b/README.md
@@ -1,21 +1,31 @@
 <div align="center">
 
-Megatron-LM and Megatron Core
-=============================
+Megatron-LM & Megatron Core
+===========================
 
 <h4>GPU-optimized library for training transformer models at scale</h4>
 
-[![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://docs.nvidia.com/megatron-core/developer-guide/latest/index.html)
-[![version](https://img.shields.io/badge/release-0.15.0-green)](./CHANGELOG.md)
+[![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://docs.nvidia.com/Megatron-Core/developer-guide/latest/index.html)
+[![version](https://img.shields.io/badge/release-0.12.0-green)](./CHANGELOG.md)
 [![license](https://img.shields.io/badge/license-Apache-blue)](./LICENSE)
 
 <div align="left">
 
-## About
+> ## 🚨 **DEVELOPMENT BRANCH**
+> ⚠️ **EXPERIMENTAL FEATURES** - This is the **dev branch** with experimental features. 
+>
+> **→ For releases and comprehensive documentation, visit the [main branch](https://github.com/NVIDIA/Megatron-LM)**
 
-This repository contains two components: **Megatron-LM** and **Megatron Core**.
+## ⚡ Quickstart
 
-**Megatron-LM** is a reference example that includes Megatron Core plus pre-configured training scripts. Best for research teams, learning distributed training, and quick experimentation.
+```bash
+# Clone the dev branch
+git clone -b dev https://github.com/NVIDIA/Megatron-LM.git
+cd Megatron-LM
+
+# Install from source with dev dependencies (includes transformer_engine)
+pip install -e .[mlm,dev]
+```
 
 **Megatron Core** is a composable library with GPU-optimized building blocks for custom training frameworks. It provides transformer building blocks, advanced parallelism strategies (TP, PP, DP, EP, CP), mixed precision support (FP16, BF16, FP8, FP4), and model architectures. Best for framework developers and ML engineers building custom training pipelines.
 
@@ -58,14 +68,21 @@ For NGC container setup and all installation options, see the **[Installation Gu
 - **[2025/05]** Megatron Core v0.11.0 brings new capabilities for multi-data center LLM training ([blog](https://developer.nvidia.com/blog/turbocharge-llm-training-across-long-haul-data-center-networks-with-nvidia-nemo-framework/)).
 
 <details>
-<summary>Previous News</summary>
+<summary>Table of Contents</summary>
 
-- **[2024/07]** Megatron Core v0.7 improves scalability and training resiliency and adds support for multimodal training ([blog](https://developer.nvidia.com/blog/train-generative-ai-models-more-efficiently-with-new-nvidia-Megatron-Core-functionalities/)).
-- **[2024/06]** Megatron Core added supports for Mamba-based models. Check out our paper [An Empirical Study of Mamba-based Language Models](https://arxiv.org/pdf/2406.07887) and [code example](https://github.com/NVIDIA/Megatron-LM/tree/ssm/examples/mamba).
-- **[2024/01 Announcement]** NVIDIA has released the core capabilities in **Megatron-LM** into [**Megatron Core**](https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core) in this repository. Megatron Core expands upon Megatron-LM's GPU-optimized techniques with more cutting-edge innovations on system-level optimizations, featuring composable and modular APIs.
+**Getting Started**
+- [⚡ Quick Start](#-quick-start)
+- [🧠 Dev Branch Philosophy](#-dev-branch-philosophy)
+- [📊 Performance & Benchmarking](#-performance--benchmarking)
+- [👥 Community & Support](#-community--support)
+
+**For Complete Documentation** → [Main Branch](https://github.com/NVIDIA/Megatron-LM) | [Official Docs](https://docs.nvidia.com/Megatron-Core/)
 
 </details>
 
+
+## Dev Branch Philosophy
+
 # Project Structure
 
 ```
@@ -128,17 +145,32 @@ We also strong scaled the standard GPT-3 model (our version has slightly more th
 
 # Roadmaps
 
-- **[MoE Roadmap](https://github.com/NVIDIA/Megatron-LM/issues/1729)** - DeepSeek-V3, Qwen3, advanced parallelism, FP8 optimizations, and Blackwell enhancements
+### Fast Iteration
+- **Streamlined Review**: 1 code owner + 1 dev approver (can delegate review) + CI/CD
+
+### Feature Lifecycle (Coming Soon)
+- **6-Month Timeline**: Experimental features must graduate to stable or be deprecated
+- **Migration Support**: Assistance provided for feature transitions
+
+### Stability Expectations
+- **Experimental Nature**: Features may change or be removed as development progresses
+- **Testing**: All features will pass convergence and performance validation before inclusion
+- **Support**: Dev branch issues should include `[DEV]` prefix
 
 # Resources
 
-## Getting Help
+## Performance & Benchmarking
 
-- 📖 **[Documentation](https://docs.nvidia.com/megatron-core/developer-guide/latest/index.html)** - Official documentation
-- 🐛 **[Issues](https://github.com/NVIDIA/Megatron-LM/issues)** - Bug reports and feature requests
+- 🚀 [2025/11] [Optimizing DeepSeek-V3 Training Performance on NVIDIA GB200 NVL72](docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-optimization.md).
+- ⚡ [2025/11] [A Guide to Reproduce DeepSeek-V3 Pre-training Performance on GB200](docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md).
 
-## Contributing
+## Community & Support
 
+### Getting Help
+- 📖 **[Documentation](https://docs.nvidia.com/Megatron-Core/)** - Official documentation
+- 🐛 **[Issues](https://github.com/NVIDIA/Megatron-LM/issues)** - Bug reports and feature requests
+
+### Contributing
 We ❤️ contributions! Ways to contribute:
 
 - 🐛 **Report bugs** - Help us improve reliability
@@ -146,12 +178,9 @@ We ❤️ contributions! Ways to contribute:
 - 📝 **Improve docs** - Make Megatron Core more accessible
 - 🔧 **Submit PRs** - Contribute code improvements
 
-**→ [Contributing Guide](https://docs.nvidia.com/megatron-core/developer-guide/latest/developer/contribute.html)**
-
-## Citation
-
-If you use Megatron in your research or project, we appreciate that you use the following citations:
+**→ [Contributing Guide](./CONTRIBUTING.md)**
 
+### Citation
 ```bibtex
 @article{megatron-lm,
   title={Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism},
diff --git a/docker/Dockerfile.ci.dev b/docker/Dockerfile.ci.dev
index fa214deeea5..7f3a5c0552a 100644
--- a/docker/Dockerfile.ci.dev
+++ b/docker/Dockerfile.ci.dev
@@ -16,7 +16,7 @@ ENV UV_LINK_MODE=copy
 
 RUN bash -ex <<"EOF"
     apt-get update
-    apt-get install -y --no-install-recommends gettext python3-venv psmisc uuid-runtime
+    apt-get install -y --no-install-recommends gettext python3-venv psmisc uuid-runtime 
     apt-get clean
     python -m venv /opt/jet
     ARCH=$(uname -m)
@@ -31,6 +31,11 @@ RUN bash -ex <<"EOF"
     curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh
 EOF
 
+RUN ln -sf /usr/local/cuda/targets/x86_64-linux/include/cuda \
+    /usr/local/include/cuda
+RUN find /usr/local/cuda -name "utility" 2>/dev/null | head -5 && \
+    ls /usr/local/cuda/targets/x86_64-linux/include/ | head -20
+
 COPY README.md pyproject.toml uv.lock /workspace/
 COPY megatron/core/__init__.py /workspace/megatron/core/
 COPY megatron/core/package_info.py /workspace/megatron/core/
@@ -40,7 +45,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     export NVTE_CUDA_ARCHS="80;90;100"
     uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages
     uv sync --only-group build
-    uv sync --extra ${IMAGE_TYPE} --extra mlm --link-mode copy --locked \
+    uv sync --extra ${IMAGE_TYPE} --extra mlm --group no_pypi_wheels --link-mode copy --locked \
         --no-install-package torch \
         --no-install-package torchvision \
         --no-install-package triton \
diff --git a/docs/add_copyright_header.py b/docs/add_copyright_header.py
index 9694ef84819..9bc4481c506 100644
--- a/docs/add_copyright_header.py
+++ b/docs/add_copyright_header.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 #!/usr/bin/env python3
 """One-off script to add NVIDIA copyright header to all .md files under docs/."""
 
diff --git a/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md b/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md
new file mode 100644
index 00000000000..8fa3051e479
--- /dev/null
+++ b/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md
@@ -0,0 +1,358 @@
+---
+orphan: true
+---
+
+# A Guide to Reproduce DeepSeek-V3 Pre-training Performance on GB200
+
+## 1. Dockerfile
+
+Requirements:
+- Transformer Engine: We recommend using commit [d2945c6](https://github.com/NVIDIA/TransformerEngine/commit/d2945c6a571e3978677614d1fe08779966a5a4ef) with PR [2146](https://github.com/NVIDIA/TransformerEngine/pull/2146) and [2150](https://github.com/NVIDIA/TransformerEngine/pull/2150). You could prepare the branch by yourself, or use this [branch](https://github.com/hxbai/TransformerEngine/commits/dev_20251024/) based on TE v2.9 plus the above three commits/PRs.
+- cuDNN: v9.14 is required.
+- HybridEP: Install it from [here](https://github.com/deepseek-ai/DeepEP/commits/3f601f7ac1c062c46502646ff04c535013bfca00).
+
+Dockerfile for reference.
+
+```dockerfile
+FROM nvcr.io/nvidia/pytorch:25.09-py3 AS base
+
+ENV SHELL=/bin/bash
+
+# =========================
+# Install system packages
+# =========================
+RUN rm -rf /opt/megatron-lm && \
+    apt-get update && \
+    apt-get install -y sudo gdb bash-builtins git zsh autojump tmux curl gettext libfabric-dev && \
+    wget https://github.com/mikefarah/yq/releases/download/v4.27.5/yq_linux_arm64 -O /usr/bin/yq && \
+    chmod +x /usr/bin/yq
+
+# =========================
+# Install Python packages
+# =========================
+# NOTE: `unset PIP_CONSTRAINT` to install packages that do not meet the default constraint in the base image.
+# Some package requirements and related versions are from 
+#   https://github.com/NVIDIA/Megatron-LM/blob/core_v0.12.0/Dockerfile.linting.
+#   https://github.com/NVIDIA/Megatron-LM/blob/core_v0.12.0/requirements_mlm.txt.
+#   https://github.com/NVIDIA/Megatron-LM/blob/core_v0.12.0/requirements_ci.txt.
+RUN unset PIP_CONSTRAINT && pip install --no-cache-dir debugpy dm-tree torch_tb_profiler einops wandb \
+    sentencepiece tokenizers transformers torchvision ftfy modelcards datasets tqdm pydantic \
+    nvidia-pytriton py-spy yapf darker \
+    tiktoken flask-restful \
+    nltk wrapt pytest pytest_asyncio pytest-cov pytest_mock pytest-random-order \
+    black==24.4.2 isort==5.13.2 flake8==7.1.0 pylint==3.2.6 coverage mypy \
+    setuptools==69.5.1
+
+# =========================
+# Install cudnn 9.14.0.64 for correct mxfp8 quantization and layernorm fusion
+# =========================
+RUN apt-get update && \
+    wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb && \
+    dpkg -i cuda-keyring_1.1-1_all.deb && \
+    apt-get update && \
+    apt-get -y install libcudnn9-cuda-13
+
+# =========================
+# Install latest TE
+# Use a specific commit instead of main to make it more stable.
+# This is based on release_v2.9 branch and contains some CPU and quantization optimizations.
+# =========================
+ARG COMMIT="7dd3914726abb79bc99ff5a5db1449458ed64151"
+ARG TE="git+https://github.com/hxbai/TransformerEngine.git@${COMMIT}"
+RUN pip install nvidia-mathdx==25.1.1 && \
+    unset PIP_CONSTRAINT && \
+    NVTE_CUDA_ARCHS="100" NVTE_BUILD_THREADS_PER_JOB=8 NVTE_FRAMEWORK=pytorch pip install --no-build-isolation --no-cache-dir $TE
+
+# =========================
+# Install HybridEP
+# =========================
+WORKDIR /home/
+RUN git clone --branch hybrid-ep https://github.com/deepseek-ai/DeepEP.git && \
+    cd DeepEP && git checkout 3f601f7ac1c062c46502646ff04c535013bfca00 && \
+    TORCH_CUDA_ARCH_LIST="10.0" pip install --no-build-isolation .
+
+# =========================
+# Clean cache
+# =========================
+RUN rm -rf /root/.cache /tmp/*
+```
+
+> [!Tip]
+>
+> If you prefer to use CUDA 12.9, please change the base container to `nvcr.io/nvidia/pytorch:25.06-py3` and the cuDNN to be installed to `libcudnn9-cuda-12`. 
+
+## 2. Megatron-Core
+
+We recommend using the [dev branch](https://github.com/NVIDIA/Megatron-LM/tree/dev) after PR [1917](https://github.com/NVIDIA/Megatron-LM/pull/1917).
+
+```bash
+git clone https://github.com/NVIDIA/Megatron-LM.git && \
+cd Megatron-LM &&
+git checkout effebd81f410bc6566fffee6c320b6f8f762e06d
+```
+
+## 3. Cluster Configuration
+
+Since we're using EP 32 on NVL72, it's important to make sure
+
+> [!Important]
+> **Every 32 GB200 GPUs (8 nodes) are in the same NVL domain (or rack)**.
+
+Usually you can make it via your cluster workload manager. Taking Slurm as an example, you could pass `--segment 8` to the sbatch command to ensure that every segment of 8 nodes will be scheduled to a rack.
+
+## 4. Training scripts
+
+### Environment variables
+
+```bash
+CUDA_DEVICE_MAX_CONNECTIONS=1
+NVTE_FWD_LAYERNORM_SM_MARGIN=0
+NVTE_BWD_LAYERNORM_SM_MARGIN=0
+NVLINK_DOMAIN_SIZE=72
+NVTE_ALLOW_NONDETERMINISTIC_ALGO=1
+PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+NCCL_NVLS_ENABLE=0
+NVTE_FUSED_ATTN=1
+NVTE_NORM_FWD_USE_CUDNN=1
+NVTE_NORM_BWD_USE_CUDNN=1
+PYTHONWARNINGS=ignore
+NCCL_DEBUG=VERSION
+NCCL_GRAPH_REGISTER=0
+```
+
+### bindpcie
+
+Download [bindpcie](https://github.com/NVIDIA/mlperf-common/blob/main/client/bindpcie) to your workdir, make it executable, 
+
+```bash
+wget https://raw.githubusercontent.com/NVIDIA/mlperf-common/refs/heads/main/client/bindpcie &&
+chmod 755 bindpcie
+```
+
+and then
+
+> [!Important]
+> **Place it at the beginning of your launch command in every process.**
+
+Taking Slurm as an example, your script should look like
+
+```bash
+#!/bin/bash
+
+#SBATCH [... sbatch args]
+
+srun [... srun args] /path/to/bindpcie /path/to/pretrain_gpt.py [... mcore arguments]
+```
+
+This is a very important step on GB200.
+
+### Launch script
+
+```bash
+/path/to/bindpcie \
+/path/to/megatron-lm/pretrain_gpt.py \
+--distributed-timeout-minutes 60 \
+--tensor-model-parallel-size 1 \
+--pipeline-model-parallel-size 8 \
+--expert-model-parallel-size 32 \
+--context-parallel-size 1 \
+--expert-tensor-parallel-size 1 \
+--use-distributed-optimizer \
+--overlap-grad-reduce \
+--overlap-param-gather \
+--use-mcore-models \
+--sequence-parallel \
+--use-flash-attn \
+--disable-bias-linear \
+--micro-batch-size 1 \
+--global-batch-size 2048 \
+--train-samples 585937500 \
+--exit-duration-in-mins 220 \
+--no-save-optim \
+--no-check-for-nan-in-loss-and-grad \
+--cross-entropy-loss-fusion \
+--cross-entropy-fusion-impl te \
+--manual-gc \
+--manual-gc-interval 10 \
+--enable-experimental \
+--transformer-impl transformer_engine \
+--seq-length 4096 \
+--data-cache-path /path/to/data_cache \
+--tokenizer-type HuggingFaceTokenizer \
+--tokenizer-model unsloth/DeepSeek-V3 \
+--data-path /path/to/data \
+--split 99,1,0 \
+--no-mmap-bin-files \
+--no-create-attention-mask-in-dataloader \
+--num-workers 6 \
+--num-layers 61 \
+--hidden-size 7168 \
+--ffn-hidden-size 18432 \
+--num-attention-heads 128 \
+--kv-channels 128 \
+--max-position-embeddings 4096 \
+--position-embedding-type rope \
+--rotary-base 10000 \
+--make-vocab-size-divisible-by 3232 \
+--normalization RMSNorm \
+--norm-epsilon 1e-6 \
+--swiglu \
+--untie-embeddings-and-output-weights \
+--multi-latent-attention \
+--attention-dropout 0.0 \
+--hidden-dropout 0.0 \
+--clip-grad 1.0 \
+--weight-decay 0.1 \
+--qk-layernorm \
+--lr-decay-samples 584765624 \
+--lr-warmup-samples 1536000 \
+--lr-warmup-init 3.9e-7 \
+--lr 3.9e-6 \
+--min-lr 3.9e-7 \
+--lr-decay-style cosine \
+--adam-beta1 0.9 \
+--adam-beta2 0.95 \
+--num-experts 256 \
+--moe-layer-freq ([0]*3+[1]*58) \
+--moe-ffn-hidden-size 2048 \
+--moe-shared-expert-intermediate-size 2048 \
+--moe-router-load-balancing-type seq_aux_loss \
+--moe-router-topk 8 \
+--moe-grouped-gemm \
+--moe-aux-loss-coeff 1e-4 \
+--moe-router-group-topk 4 \
+--moe-router-num-groups 8 \
+--moe-router-pre-softmax \
+--moe-router-padding-for-quantization \
+--moe-router-topk-scaling-factor 2.5 \
+--moe-router-score-function sigmoid \
+--moe-router-enable-expert-bias \
+--moe-router-bias-update-rate 1e-3 \
+--moe-router-dtype fp32 \
+--moe-permute-fusion \
+--moe-router-fusion \
+--q-lora-rank 1536 \
+--kv-lora-rank 512 \
+--qk-head-dim 128 \
+--qk-pos-emb-head-dim 64 \
+--v-head-dim 128 \
+--rotary-scaling-factor 40 \
+--mscale 1.0 \
+--mscale-all-dim 1.0 \
+--eval-iters 32 \
+--eval-interval 200 \
+--no-load-optim \
+--no-load-rng \
+--auto-detect-ckpt-format \
+--load None \
+--save /path/to/checkpoints \
+--save-interval 500 \
+--dist-ckpt-strictness log_all \
+--init-method-std 0.02 \
+--log-timers-to-tensorboard \
+--log-memory-to-tensorboard \
+--log-validation-ppl-to-tensorboard \
+--log-throughput \
+--log-interval 1 \
+--logging-level 40 \
+--tensorboard-dir /path/to/tensorboard \
+--wandb-project deepseek-v3-benchmarking-v0.15 \
+--wandb-exp-name DeepSeek-V3-TP1PP8EP32CP1VPP4-MBS1GBS2048-v0.15 \
+--bf16 \
+--enable-experimental \
+--recompute-granularity selective \
+--recompute-modules moe_act mlp \
+--cuda-graph-impl transformer_engine \
+--cuda-graph-scope attn moe_router moe_preprocess \
+--te-rng-tracker \
+--pipeline-model-parallel-layout "Et|(tt|)*30L" \
+--moe-router-force-load-balancing \
+--moe-token-dispatcher-type flex \
+--moe-flex-dispatcher-backend hybridep \
+--moe-hybridep-num-sms 32 \
+--fp8-recipe mxfp8 \
+--fp8-format e4m3 \
+--fp8-param-gather \
+--reuse-grad-buf-for-mxfp8-param-ag \
+--use-precision-aware-optimizer \
+--main-grads-dtype fp32 \
+--main-params-dtype fp32 \
+--exp-avg-dtype bf16 \
+--exp-avg-sq-dtype bf16 \
+```
+
+### Explanation of arguments
+
+The following arguments indicate key optimizations.
+
+- Pipeline parallel layout
+
+```bash
+--pipeline-model-parallel-layout "Et|(tt|)*30L"
+```
+
+`E` stands for embedding, `t` for transformer layer, `L` for Loss. So it's interpreted as a total of 32 stages, where the first stage is Embedding + 1 transformer layer, the last stage is Loss, and the middle 30 stages are 2 transformer layers.
+
+- Fine-grained recompute
+
+```bash
+--recompute-granularity selective \
+--recompute-modules moe_act mlp \
+```
+
+- Partial CUDA Graphs
+
+```bash
+--cuda-graph-impl transformer_engine \
+--cuda-graph-scope attn moe_router moe_preprocess \
+--te-rng-tracker \
+```
+
+- Force load balancing for performance benchmark
+
+```bash
+--moe-router-force-load-balancing \
+```
+
+- HybridEP
+
+```bash
+--moe-token-dispatcher-type flex \
+--moe-flex-dispatcher-backend hybridep \
+--moe-hybridep-num-sms 32 \
+```
+
+- MXFP8 recipe
+
+```bash
+--fp8-recipe mxfp8 \
+--fp8-format e4m3 \
+--fp8-param-gather \
+--reuse-grad-buf-for-mxfp8-param-ag \
+```
+
+- BF16 optimizer states
+
+```bash
+--use-precision-aware-optimizer \
+--main-grads-dtype fp32 \
+--main-params-dtype fp32 \
+--exp-avg-dtype bf16 \
+--exp-avg-sq-dtype bf16 \
+```
+
+- Kernel fusions
+
+```bash
+--cross-entropy-loss-fusion \
+--cross-entropy-fusion-impl te \
+--moe-permute-fusion \
+--moe-router-fusion \
+```
+
+- Manual GC to make ranks better synchronized
+
+```bash
+--manual-gc \
+--manual-gc-interval 10 \
+```
diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image1.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image1.png
new file mode 100644
index 00000000000..6e4dad685c4
Binary files /dev/null and b/docs/discussions/deepseek-v3-gb200-optimization/images/image1.png differ
diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image2.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image2.png
new file mode 100644
index 00000000000..920e3c57f94
Binary files /dev/null and b/docs/discussions/deepseek-v3-gb200-optimization/images/image2.png differ
diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image3.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image3.png
new file mode 100644
index 00000000000..f606dbfb744
Binary files /dev/null and b/docs/discussions/deepseek-v3-gb200-optimization/images/image3.png differ
diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image4.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image4.png
new file mode 100644
index 00000000000..04239401edd
Binary files /dev/null and b/docs/discussions/deepseek-v3-gb200-optimization/images/image4.png differ
diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image5.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image5.png
new file mode 100644
index 00000000000..0128fc7ae45
Binary files /dev/null and b/docs/discussions/deepseek-v3-gb200-optimization/images/image5.png differ
diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image6.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image6.png
new file mode 100644
index 00000000000..cb2ed2eb9ad
Binary files /dev/null and b/docs/discussions/deepseek-v3-gb200-optimization/images/image6.png differ
diff --git a/docs/discussions/deepseek-v3-gb200-optimization/images/image7.png b/docs/discussions/deepseek-v3-gb200-optimization/images/image7.png
new file mode 100644
index 00000000000..325d0fd4f52
Binary files /dev/null and b/docs/discussions/deepseek-v3-gb200-optimization/images/image7.png differ
diff --git a/docs/source/api-guide/router_replay.md b/docs/source/api-guide/router_replay.md
new file mode 100644
index 00000000000..b2e043b3065
--- /dev/null
+++ b/docs/source/api-guide/router_replay.md
@@ -0,0 +1,180 @@
+---
+orphan: true
+---
+
+# Design Document: MoE Router Replay Feature
+
+## 1. Overview
+
+This document provides a detailed description of the "Router Replay" feature implemented within the Megatron-LM Core for Mixture-of-Experts (MoE) models.
+
+This feature is designed to enhance determinism and analyzability in MoE model training and inference. It enables the model to load routing decisions from a predefined file and enforce their use during the forward pass, thereby bypassing the real-time routing computation.
+
+## 2. Motivation
+
+*   **Determinism & Reproducibility**: In distributed training, MoE routing decisions can exhibit minor variations due to factors like floating-point precision. By replaying a fixed routing table, the MoE computation path is guaranteed to be identical across runs, which facilitates debugging and reproducing experimental results.
+*   **Performance Profiling**: The router's own computation (e.g., logits calculation, top-k selection) incurs overhead. In replay mode, this part of the computation can be completely skipped, allowing for more precise isolation and profiling of performance bottlenecks within the Expert Layers themselves.
+*   **Debugging Aid**: When issues arise in the model, fixing the routing decisions helps to isolate variables, making it easier to determine whether the problem lies with the routing mechanism or the expert computations.
+
+## 3. Design and Architecture
+
+The design follows the principles of being non-intrusive and on-demand, with the core idea of activating the replay logic only when explicitly requested by the user.
+
+*   **Core Components**:
+    *   `RouterReplay` (located in `megatron/core/transformer/moe/router_replay.py`): A utility class for replaying MoE routing decisions. When enabled via the `enable_routing_replay` flag, a separate instance of `RouterReplay` is created for each MoE layer's router. Each instance is responsible for loading routing data and providing the deterministic routing decisions for its corresponding layer during the forward pass.
+    *   `enable_routing_replay` (located in `megatron/core/transformer/transformer_config.py`): A boolean global configuration flag that serves as the sole entry point for enabling this feature.
+
+*   **Workflow**:
+    The feature supports different modes, such as recording and replaying, controlled by a `RouterReplayAction`.
+
+    1.  **Enabling the Feature**: The user sets `enable_routing_replay` to `True` in the model configuration.
+    2.  **Initialization**: When `enable_routing_replay` is true, each `TopKRouter` creates its own `RouterReplay` instance.
+    3.  **Mode Configuration**: The user must programmatically set the desired router replay action (e.g., `record`, `forward_replay`, `backward_replay`) on the `RouterReplay` instances.
+    4.  **Execution Flow (within a mini-batch)**:
+        *   **Forward Pass**:
+            *   For each micro-batch, the `topk_routing_with_score_function` checks the `router_replay_action`.
+            *   **In `record` mode**: The dynamically computed `top-k` expert indices are captured and stored.
+            *   **In `forward_replay` mode**: The function retrieves pre-loaded expert indices from `target_topk_idx`. These indices are used for the forward computation and are also appended to the `replay_backward_list` to prepare for the backward pass.
+        *   **Backward Pass**:
+            *   For each micro-batch (processed in reverse order in pipeline parallelism), the `router_replay_action` is checked again.
+            *   **In `backward_replay` mode**: The function retrieves the expert indices for the corresponding micro-batch by popping them from the `replay_backward_list`. This mode is intended for training recomputation (e.g., activation checkpointing and pipeline recompute) so the same routing decisions are used during recompute/backward as in forward, ensuring determinism and correctness.
+
+## 4. Implementation Details
+
+The implementation cleanly separates the replay logic from the router's core computation.
+
+*   **`megatron/core/transformer/transformer_config.py`**:
+    *   Adds the configuration option `enable_routing_replay: bool = False`.
+
+*   **`megatron/core/transformer/moe/moe_utils.py`**:
+    *   Introduces the `RouterReplay` class to manage the state for recording and replaying routing decisions for a single MoE layer.
+        *   `target_topk_idx`: An attribute holding the expert indices for the current micro-batch during forward replay mode.
+        *   `recorded_topk_idx`: An attribute for storing the computed expert indices when in record mode.
+        *   `replay_backward_list`: A list that accumulates the `top-k` indices used during the forward passes of a mini-batch. This list is consumed in FIFO order during the backward pass to ensure correctness under pipeline parallelism.
+        *   `set_target_indices()`: A method to load the replay indices into `target_topk_idx` for the forward pass.
+        *   `record_indices()`: A method to save the computed indices.
+    *   The `topk_routing_with_score_function` is modified to contain the core logic. It checks the `router_replay_action` on the `router_replay` instance and accordingly performs one of the following actions: computes and records indices, replays indices from `target_topk_idx` (for forward), replays indices from `replay_backward_list` (for backward), or falls through to the default dynamic routing.
+
+### Training recompute usage
+- During forward replay, `set_target_indices()` prepares `replay_backward_list` so each micro-batch’s indices are available for recomputation.
+- During recompute/backward, set action to `REPLAY_BACKWARD` so indices are consumed in FIFO order to mirror the forward sequence.
+
+## 5. Usage Guide
+
+1.  **Enable & Instantiate**
+    - Create one `RouterReplay` instance per MoE router layer when building the model.
+    - Optionally use the global helpers to set/clear actions across all layers.
+2.  **Record Routing Decisions**
+    - Set action: `RouterReplay.set_global_router_replay_action(RouterReplayAction.RECORD)`.
+    - Run the model; retrieve per-layer indices via `RouterReplay.get_recorded_data()` and persist.
+3.  **Forward Replay**
+    - Load indices and distribute: `RouterReplay.set_replay_data(list_of_tensors)`.
+    - Set action: `RouterReplay.set_global_router_replay_action(RouterReplayAction.REPLAY_FORWARD)`.
+    - Run the model; dynamic top‑k is bypassed and target indices are used.
+4.  **Backward Replay**
+    - For training recomputation (activation checkpointing or pipeline recompute), set action: `REPLAY_BACKWARD` during recomputation.
+    - Per micro‑batch indices are consumed from `replay_backward_list` in FIFO order.
+5.  **Cleanup**
+    - Use `RouterReplay.clear_global_indices()`, `RouterReplay.clear_global_router_replay_action()`, and `RouterReplay.clear_global_router_replay_instances()` to restore default behavior and prevent memory leaks.
+
+### Quick usage with `topk_routing_with_score_function`
+
+```python
+import torch
+from megatron.core.transformer.moe.router_replay import RouterReplay, RouterReplayAction
+from megatron.core.transformer.moe.moe_utils import topk_routing_with_score_function
+
+rr = RouterReplay()
+
+# Record
+RouterReplay.set_global_router_replay_action(RouterReplayAction.RECORD)
+logits = torch.randn(8, 16)
+probs_rec, routing_map_rec = topk_routing_with_score_function(
+    logits=logits, topk=2, use_pre_softmax=False, score_function="softmax", router_replay=rr,
+)
+recorded = rr.get_recorded_indices()
+torch.save(recorded, "/tmp/replay.pt")
+
+# Forward replay
+rr.clear_router_replay_action()
+rr.set_router_replay_action(RouterReplayAction.REPLAY_FORWARD)
+target = torch.load("/tmp/replay.pt")
+rr.set_target_indices(target)
+probs_rep, routing_map_rep = topk_routing_with_score_function(
+    logits=logits, topk=2, use_pre_softmax=False, score_function="softmax", router_replay=rr,
+)
+
+RouterReplay.clear_global_router_replay_action()
+RouterReplay.clear_global_indices()
+RouterReplay.clear_global_router_replay_instances()
+```
+
+## 6. Minimal Demo
+
+Here is a minimal code example showing how to use RouterReplay for recording and replaying:
+
+```python
+import torch
+import torch.distributed as dist
+from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.transformer.moe.router import TopKRouter
+from megatron.core.transformer.moe.router_replay import RouterReplay, RouterReplayAction
+
+
+# Initialize distributed training
+if not dist.is_initialized():
+    dist.init_process_group(backend="nccl")
+
+# Create a transformer config with RouterReplay enabled
+config = TransformerConfig(
+    num_experts=8,
+    expert_model_parallel_size=1,
+    num_top_k=2,
+    enable_routing_replay=True
+)
+
+# Create a TopKRouter instance
+router = TopKRouter(config)
+
+# Generate sample input (batch_size, sequence_length, hidden_size)
+logits = torch.randn(16, 32, 8).to(torch.cuda.current_device())
+
+# -----------------
+# 1. Recording Mode
+# -----------------
+print("=== Recording Mode ===")
+# Set global router replay action to RECORD
+RouterReplay.set_global_router_replay_action(RouterReplayAction.RECORD)
+
+# Perform routing
+routing_output = router.forward(logits)
+print(f"Recorded top-k indices shape: {routing_output.top_k_idx.shape}")
+
+# -----------------
+# 2. Forward Replay Mode
+# -----------------
+print("\n=== Forward Replay Mode ===")
+# Save recorded indices to a file
+torch.save(routing_output.top_k_idx, "/tmp/replay.pt")
+
+# Load indices from file and set as target for replay
+replay_indices = torch.load("/tmp/replay.pt")
+for router_instance in RouterReplay.global_router_replay_instances:
+    router_instance.target_topk_idx = replay_indices
+
+# Set global router replay action to REPLAY_FORWARD
+RouterReplay.set_global_router_replay_action(RouterReplayAction.REPLAY_FORWARD)
+
+# Perform routing again - this will use the replayed indices
+replay_routing_output = router.forward(logits)
+print(f"Replayed top-k indices shape: {replay_routing_output.top_k_idx.shape}")
+print(f"Are indices the same? {torch.equal(routing_output.top_k_idx, replay_routing_output.top_k_idx)}")
+
+
+# Clean up
+RouterReplay.clear_global_router_replay_action()
+RouterReplay.clear_global_indices()
+RouterReplay.clear_global_router_replay_instances()
+if dist.is_initialized():
+    dist.destroy_process_group()
+```
diff --git a/docs/user-guide/features/fine_grained_activation_offloading.md b/docs/user-guide/features/fine_grained_activation_offloading.md
index 494674bd4f0..bb882602f37 100644
--- a/docs/user-guide/features/fine_grained_activation_offloading.md
+++ b/docs/user-guide/features/fine_grained_activation_offloading.md
@@ -7,34 +7,161 @@
    license agreement from NVIDIA CORPORATION is strictly prohibited.
 -->
 
-# Fine-grained Activation Offloading (collaborated with rednote)
+# Fine-Grained Activation Offloading
 
-Memory capacity is more and more important with the rising of extreme sparse MoE models like DeepSeek-V3 and Qwen3-235B. Fine-grained recomputing reduces the memory footprint at the cost of extra recomputation, while offloading could utilize the host-device bandwidth to achieve nearly zero-overhead. Fine-grained Activation Offloading targets at offloading the activation at the granularity of specific modules, so that we can calibrate the amount of offloading activation to maximize the training throughput.
+Fine-grained activation offloading reduces GPU memory by asynchronously transferring activations to CPU at the granularity of individual submodules within a transformer layer. Unlike layer-level offloading, it allows precise control over which activations to offload, enabling a tradeoff between memory savings and PCIe bandwidth overhead.
 
-Currently, the supported offloading modules are `"attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act"`, which could work with fine-grained recomputation to release almost all activations of a transformer layer.
+## User Guide
 
-**Features**
-* Support PP=1/PP/Interleaved PP
-* Compatible with fine-grained recomputation
-* Support FP8
-* Support MTP
-* Support mixed dense & moe layer
-* Support A2A Overlap
-* Support CUDA Graph
-  * (Temporary) cuda graph scope cannot contains the offloading modules
+### Basic Usage
 
-**Usage**
 ```bash
 # Enable fine-grained activation offloading
 --fine-grained-activation-offloading
 
-# Specify which modules are going to offload its input
-# Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
---offload-modules expert_fc1
+# Specify which modules to offload (can combine multiple)
+# Choices: attn_norm, qkv_linear, core_attn, attn_proj, mlp_norm, expert_fc1, moe_act
+--offload-modules core_attn attn_proj expert_fc1
+```
+
+### Offloadable Modules
+
+Each module offloads its **input** activation to CPU during forward and reloads it before backward:
+
+| Module | Description | Notes |
+|---|---|---|
+| `attn_norm` | Input layernorm of attention | Skipped if using `IdentityOp` |
+| `qkv_linear` | QKV linear projection | |
+| `core_attn` | Core attention (softmax + matmul) | |
+| `attn_proj` | Output projection of attention | Must be used together with `core_attn` |
+| `mlp_norm` | Pre-MLP layernorm | Skipped if using `IdentityOp` |
+| `expert_fc1` | First FC layer in MoE experts | MoE models only |
+| `moe_act` | Activation function in MoE experts | MoE models only |
+
+### Tuning Parameters
+
+```bash
+# Minimum tensor size (in elements) to offload. Smaller tensors are skipped.
+# Default: 1048576 (1M elements)
+--min-offloaded-tensor-size 1048576
+
+# Fraction of activations to offload, range [0, 1]. Default: 1.0
+# Useful for partial offloading when PCIe bandwidth is a bottleneck.
+--activation-offload-fraction 0.8
+
+# Reduce offload amount on higher PP ranks (in bytes). Default: 0
+# Higher PP ranks have fewer microbatches in flight, so offloading less
+# reduces overhead without increasing peak memory.
+--delta-offload-bytes-across-pp-ranks 1073741824
+```
+
+### CUDA Graph Integration
+
+Fine-grained offloading is compatible with CUDA graphs. When CUDA graph is enabled, the following constraints apply:
+
+- `attn_norm` and `mlp_norm` **cannot** be offloaded (they cross CUDA graph boundaries).
+- `cuda_graph_scope` must include `attn` and `moe_router`.
+- `cuda_graph_impl` must be `transformer_engine`.
+- Requires `torch >= 2.9.0` and `transformer_engine >= 2.14.0`.
+
+```bash
+# Optional: defer D2H enqueue for offloads *outside* cuda_graph_scope (MoE experts; see below)
+--delay-offload-until-cuda-graph
+```
+
+**`--delay-offload-until-cuda-graph` (`TransformerConfig.delay_offload_until_cuda_graph`)**
+
+**Inside vs outside `cuda_graph_scope`.** Offload boundaries that lie **inside** the captured `cuda_graph_scope` (for example `qkv_linear`, `core_attn`, and `attn_proj` when `attn` is in scope) are part of CUDA graph **capture and replay**. Their offload-related work is replayed with the graph rather than re-driven from Python each step, so they do **not** incur the same per-step CPU launch overhead as a purely eager path.
+
+Boundaries that run **outside** the captured region still execute as normal eager PyTorch each forward—for the recommended MoE setup, that includes expert compute after a graphed `moe_router` (e.g. offloading `expert_fc1` / `moe_act`). For those groups, each `group_offload` would otherwise submit D2H work from the host as soon as the forward hits the commit point.
+
+**What this flag does.** It only affects offload commits that are explicitly wired with **delayed** group commit (currently the MoE expert path: `expert_fc1`, `moe_act`). Around each layer’s `TransformerEngine` CUDA graph replay, the offload manager enters **replay mode**; delayed commits **enqueue** `(callback, group name, forced tensors)` instead of launching D2H immediately, then **flush_delayed_groups** runs **after** that graph replay returns and issues the queued D2H copies in forward order, without changing the offload/reload semantics.
+
+**When this actually buys time (EP A2A after replay).** The benefit assumes a **real CPU/GPU synchronization gap right after graph replay**—in the usual MoE training layout, **expert parallel (EP) all-to-all** and related dispatch follows the graphed `moe_router` region. That A2A path typically needs the host to coordinate collectives and to **sync with the GPU** (e.g. wait for graph work to finish or for communication staging), so the CPU is not fully overlapped with useful launch work during that interval. Scheduling `flush_delayed_groups` **immediately after** `cudaGraphLaunch` returns uses that window to issue D2H copies from the host: the enqueue cost is largely **hidden** in slack that EP A2A would already incur. If there were no such post-replay sync (or expert work were fully captured inside the graph with no host-visible gap), deferring commits would not provide the same “free” host time.
+
+**Behavioral notes**
+
+- Does **not** replace or “delay” attention-side offloads inside the graphed `attn` region; those are not on the delayed path in the implementation.
+- Warmup and non-replay forwards still commit delayed-eligible groups immediately (no replay-mode deferral).
+- Must be used together with **fine-grained activation offloading** and **CUDA graph** under the same rules as this section (TE `cuda_graph_impl`, scope including `attn` and `moe_router`, etc.).
+- Stream ordering between the graph compute path and `d2h_stream` still uses the existing events (`forward_record` / `backward_record`); this option only changes **when** eligible D2H work is submitted from the host.
+
+### Combining with Fine-Grained Recomputation
+
+Offloading and recomputation are complementary:
+- Use **recomputation** for lightweight modules (e.g., layernorm, activation functions) with negligible compute overhead.
+- Use **offloading** for heavy modules (e.g., core_attn, expert_fc1) where recomputation would be too costly.
+
+```bash
+--recompute-granularity selective
+--recompute-modules layernorm moe_act
+--fine-grained-activation-offloading
+--offload-modules core_attn attn_proj expert_fc1
 ```
-**Compatible with Fine-grained Recomputation**
-- For modules with minor perf overhead like layernorm or moe_act, use recomputing to reduce memory footprint;
-- For other modules, use offloading to reduce memory footprint;
-- Make sure the offloading/reloading could be overlapped with computing;
 
 ![Fine-grained Activation Offloading and Fine-grained Recomputation](../../images/fine_grained_activation_offloading/offloading_and_recomputing.png)
+
+
+### Compatibility
+
+| Feature | Supported |
+|---|---|
+| PP / Interleaved PP / PP=1 | Yes |
+| Fine-grained recomputation | Yes |
+| FP8 training | Yes |
+| MTP (Multi-Token Prediction) | Yes |
+| Mixed dense & MoE layers | Yes |
+| A2A overlap (EP) | Yes |
+| CUDA Graph (TE impl) | Yes |
+
+---
+
+## How It Works
+
+### Architecture Overview
+
+The implementation consists of three layers:
+
+1. **`PipelineOffloadManager`** (singleton): Global coordinator that manages CUDA streams, CPU tensor pools, and chunk lifecycle across pipeline stages.
+2. **`ChunkOffloadHandler`**: Per-microbatch handler that tracks tensor groups, executes D2H/H2D transfers, and decides which groups to actually offload.
+3. **`FineGrainedActivationOffloadingInterface`**: Lightweight interface used by transformer modules (attention, MoE, etc.) to mark offload boundaries.
+
+### Offload/Reload Flow
+
+```
+Forward pass (Layer N):                    Backward pass (Layer N):
+┌─────────────────────┐                    ┌───────────────────────┐
+│ group_start(input)  │─── register ──►    │                       │
+│                     │    tensor group    │ group_commit_backward │
+│ module.forward()    │                    │   wait H2D complete   │
+│                     │                    │   pop tensors from    │
+│ group_offload(out)  │─── D2H async ──►   │   CPU → GPU           │
+│   on d2h_stream     │    to pinned CPU   │   on h2d_stream       │
+└─────────────────────┘                    └───────────────────────┘
+```
+
+1. **`group_start`**: Registers a new tensor group and hooks into `saved_tensors_hooks` to intercept `save_for_backward`.
+2. **Forward execution**: All tensors saved by autograd within the group are captured.
+3. **`group_offload`**: Triggers asynchronous D2H copy on a dedicated CUDA stream (`d2h_stream`), optionally releases GPU storage of input tensors.
+4. **Backward**: Before the group's backward, tensors are reloaded from CPU to GPU on `h2d_stream`, and the compute stream waits for the transfer to complete.
+
+### Warmup and Adaptive Offloading
+
+The first training iteration serves as a **warmup phase** where the manager records tensor groups, their sizes, and the execution order. After warmup, a `post_warmup_callback` runs to:
+
+1. **Reserve margin**: The last N groups (by deduplication count) are kept on GPU to avoid reload blocking the compute stream.
+2. **Apply PP rank delta**: Higher PP ranks offload fewer bytes (controlled by `delta_offload_bytes_across_pp_ranks`).
+3. **Apply fraction**: Only a fraction of eligible groups are actually offloaded (controlled by `activation_offload_fraction`).
+4. **Print summary table**: An ASCII table of per-rank offload bytes is printed for debugging.
+
+### CPU Tensor Pool
+
+A 'OffloadTensorPool` (on CPU with pinned memory) caches allocated tensors by `(shape, dtype)`. This avoids repeated `cudaMallocHost` / `cudaFreeHost` calls and reduces D2H latency after the first iteration.
+
+### CUDA Graph Support
+
+When offloading interacts with CUDA graphs:
+
+- A dedicated `cuda_graph_stream` runs the captured computation, while `d2h_stream` overlaps D2H transfers for regions that are **inside** the graph capture.
+- During CUDA graph **warmup**, offloading is disabled (`pre_warmup_hook` / `post_warmup_hook`).
+- **`delay_offload_until_cuda_graph`** applies to offload boundaries **outside** the captured scope (MoE `expert_fc1` / `moe_act` in the typical `attn` + `moe_router` configuration): D2H enqueue is deferred until **after** that layer’s graph replay returns, as described under CUDA Graph Integration. The intended win is overlapping host-side offload launches with **CPU/GPU synchronization slack before EP A2A** after replay; graphed attention offloads do not use this delayed path.
\ No newline at end of file
diff --git a/examples/rl/environments/countdown/countdown.py b/examples/rl/environments/countdown/countdown.py
index c5ad57bb72d..acfabc46681 100644
--- a/examples/rl/environments/countdown/countdown.py
+++ b/examples/rl/environments/countdown/countdown.py
@@ -1,3 +1,4 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # This file is adapted from code in https://github.com/Jiayi-Pan/TinyZero
 
 import re
diff --git a/gpt_builders.py b/gpt_builders.py
index 24b5f89d311..b273f3c12aa 100644
--- a/gpt_builders.py
+++ b/gpt_builders.py
@@ -11,6 +11,7 @@
 )
 from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
     get_transformer_block_with_experimental_attention_variant_spec,
+    get_transformer_layer_with_experimental_attention_variant_spec,
 )
 from megatron.core.models.gpt.heterogeneous.heterogeneous_layer_specs import (
     get_gpt_heterogeneous_layer_spec,
@@ -70,23 +71,29 @@ def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None, pg_
         mtp_block_spec = None
         if args.mtp_num_layers is not None:
             assert not (config.transformer_impl == "inference_optimized")
-            if (
-                hasattr(transformer_layer_spec, 'layer_specs')
-                and len(transformer_layer_spec.layer_specs) == 0
-            ):
-                # Get the decoder layer spec explicitly if no decoder layer in the last stage,
-                # Only happens with block spec (TransformerBlockSubmodules) when using MoE.
-                transformer_layer_spec_for_mtp = _get_transformer_layer_spec(use_te, config)
+            # Get GPT decoder layer specs for the model.
+            if args.spec is not None:
+                mtp_transformer_layer_spec = import_module(args.spec)
             else:
                 # Define the decoder block spec
-                decoder_layer_specs = get_gpt_decoder_layer_specs(
-                    config, use_transformer_engine=use_te, normalization=args.normalization, qk_l2_norm=args.qk_l2_norm, vp_stage=vp_stage
-                )
-                transformer_layer_spec_for_mtp = decoder_layer_specs[-1]
+                if args.experimental_attention_variant is not None:
+                    decoder_layer_specs = (
+                        get_transformer_layer_with_experimental_attention_variant_spec(
+                            config=config
+                        )
+                    )
+                else:
+                    decoder_layer_specs = get_gpt_decoder_layer_specs(
+                        config,
+                        use_transformer_engine=use_te,
+                        normalization=args.normalization,
+                        qk_l2_norm=args.qk_l2_norm,
+                    )
+                mtp_transformer_layer_spec = decoder_layer_specs[-1]
             # Use spec of the last layer in decoder block as spec of the transformer layer in MTP
             mtp_block_spec = get_gpt_mtp_block_spec(
                 config,
-                transformer_layer_spec_for_mtp,
+                mtp_transformer_layer_spec,
                 use_transformer_engine=use_te,
                 vp_stage=vp_stage,
             )
@@ -135,6 +142,8 @@ def _get_transformer_layer_spec(use_te, config):
             use_te_activation_func=config.use_te_activation_func,
             use_kitchen_attention=config.use_kitchen_attention,
             kitchen_attention_backend=config.kitchen_attention_backend,
+            fallback_to_eager_attn=config.fallback_to_eager_attn,
+            enable_hyper_connection=config.enable_hyper_connections,
             mla_down_proj_fusion=getattr(config, "mla_down_proj_fusion", False),
         )
     elif config.transformer_impl == "inference_optimized":
@@ -154,4 +163,5 @@ def _get_transformer_layer_spec(use_te, config):
             use_kitchen=config.use_kitchen,
             use_kitchen_attention=config.use_kitchen_attention,
             kitchen_attention_backend=config.kitchen_attention_backend,
+            enable_hyper_connection=config.enable_hyper_connections,
         )
diff --git a/hello_world b/hello_world
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/megatron/core/datasets/blended_megatron_dataset_builder.py b/megatron/core/datasets/blended_megatron_dataset_builder.py
index f728fe10d03..6cb75ab5104 100644
--- a/megatron/core/datasets/blended_megatron_dataset_builder.py
+++ b/megatron/core/datasets/blended_megatron_dataset_builder.py
@@ -48,11 +48,13 @@ def __init__(
         sizes: List[int],
         is_built_on_rank: Callable,
         config: BlendedMegatronDatasetConfig,
+        vp_stage: Optional[int] = None,
     ):
         self.cls = cls
         self.sizes = sizes
         self.is_built_on_rank = is_built_on_rank
         self.config = config
+        self.vp_stage = vp_stage
 
         log_single_rank(
             logger,
diff --git a/megatron/core/datasets/data_schedule.py b/megatron/core/datasets/data_schedule.py
index 0f016473b6a..45b78e625c6 100644
--- a/megatron/core/datasets/data_schedule.py
+++ b/megatron/core/datasets/data_schedule.py
@@ -1,15 +1,26 @@
 # Copyright (c) 2025 NVIDIA CORPORATION.  All rights reserved.
 
-from typing import Any, List, Optional
+from typing import Any, Dict, List, Optional, Type
 
 import torch
 
 from megatron.core import parallel_state
-from megatron.core.pipeline_parallel.hybrid_cp_schedule import BalancedCPScheduler
+from megatron.core.datasets.data_schedule_utils import (
+    broadcast_scalars,
+    broadcast_tensor,
+    broadcast_to_pp_group,
+    build_packed_microbatches,
+    create_data_iterator,
+    get_batch_and_global_seqlens,
+    get_cp_slice_for_thd,
+    reroute_samples_to_dcp_ranks,
+)
+from megatron.core.packed_seq_params import PackedSeqParams
+from megatron.core.pipeline_parallel.dynamic_cp_schedule import BalancedCPScheduler
 from megatron.core.process_groups_config import ProcessGroupCollection
 
 
-class HybridCPDataLoaderWrapper:
+class DynamicCPDataLoaderWrapper:
     """
     A wrapper class that wraps around an existing data_iterator.
     For every __next__ call,
@@ -40,7 +51,7 @@ def __init__(
             self.tp_group = pg_collection.tp
         assert (
             self.dp_cp_group is not None and self.dp_group is not None and self.tp_group is not None
-        ), "dp_cp_group, dp_group, tp_group must not be None when using hybrid context parallel"
+        ), "dp_cp_group, dp_group, tp_group must not be None when using dynamic context parallel"
 
         self.cp_balancing_scheduler = BalancedCPScheduler(
             max_seq_len_per_rank=self.config.max_seqlen_per_dp_cp_rank, dp_cp_group=self.dp_cp_group
@@ -299,3 +310,547 @@ def __next__(self) -> Any:
             batch, global_ids_this_rank, global_id_seqlens, sample_id_groups, offsets
         )
         return samples_this_rank_with_id, sample_id_groups
+
+
+class BasePackingScheduler:
+    """Base class for sequence packing schedulers."""
+
+    def __init__(
+        self,
+        max_seqlen_per_dp_cp_rank: int,
+        cp_size: int,
+        dp_size: int,
+        microbatch_group_size_per_vp_stage: Optional[int],
+    ):
+        """
+        Args:
+            max_seqlen_per_dp_cp_rank: The maximum sequence length per DPxCP rank.
+            cp_size: The context parallel size.
+            dp_size: The data parallel size.
+            microbatch_group_size_per_vp_stage: The microbatch group size per virtual
+            pipeline stage, only used when enabling VPP, otherwise None.
+        """
+        self.max_seqlen_per_dp_cp_rank = max_seqlen_per_dp_cp_rank
+        self.cp_size = cp_size
+        self.dp_size = dp_size
+        self.microbatch_group_size_per_vp_stage = microbatch_group_size_per_vp_stage
+
+    def get_required_sample_keys(self):
+        """Return the required key of each batch."""
+        raise NotImplementedError
+
+    def get_groups_and_subsamples(self, sample_id_seqlens):
+        """schedule the samples into groups"""
+        raise NotImplementedError
+
+    def run(
+        self,
+        data_iterator,
+        num_microbatches,
+        dp_group,
+        tp_group,
+        pp_group,
+        dp_cp_group,
+        dev,
+        config,
+    ):
+        """
+        Run the scheduler and return the new data_iterator.
+
+        Args:
+            data_iterator: The data iterator.
+            num_microbatches: The number of microbatches to fetch.
+            dp_group: Data parallel process group.
+            tp_group: Tensor parallel process group.
+            pp_group: Pipeline parallel process group.
+            dp_cp_group: Data parallel + context parallel process group.
+            dev: CUDA device.
+            config: Model parallel config.
+
+        Returns:
+            new_data_iterator: The new data iterator (or list for VPP).
+            num_micro_batches: Number of micro batches after scheduling.
+            seqlen_sum_this_global_batch: Total tokens for FLOPs calculation.
+            seqlen_squared_sum_this_global_batch: Sum of squared seqlens for FLOPs.
+        """
+        raise NotImplementedError
+
+
+class DpBalancedScheduler(BasePackingScheduler):
+    """Packs sequences in their original order until reaching the max limit of sequence length."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.max_seq_len_all_ranks = self.max_seqlen_per_dp_cp_rank * self.cp_size
+
+    def get_required_sample_keys(self):
+        """Return the required key of each batch."""
+        return [
+            "tokens",
+            "labels",
+            "loss_mask",
+            "position_ids",
+            "original_seq_len",  # Length of the original sequence length, should be a gpu tensor.
+            "padded_seq_len",  # Length of the padded sequence length, should be a gpu tensor.
+        ]
+
+    def get_groups_and_subsamples(self, sample_id_seqlens):
+        """
+        Packs sequences in their original order until reaching the max limit of sequence length.
+        """
+        sample_id_groups = []
+        packed_id_groups = []
+        sum_seqlen = 0
+        single_microbatch = []
+
+        for i in range(len(sample_id_seqlens)):
+            if sum_seqlen + sample_id_seqlens[i][1] <= self.max_seq_len_all_ranks:
+                single_microbatch.append(i)
+                sum_seqlen += sample_id_seqlens[i][1]
+            else:
+                packed_id_groups.append(single_microbatch)
+                single_microbatch = [i]
+                sum_seqlen = sample_id_seqlens[i][1]
+        if len(single_microbatch) > 0:
+            packed_id_groups.append(single_microbatch)
+
+        # we want the number of packed sequences to be multiple of dp_size
+        # so we move few samples from previous microbatch
+        # to the end of the microbatches if needed
+        num_packed_sequence = len(packed_id_groups)
+
+        # when enabling vpp, we want the number of packed sequences to be
+        # multiple of dp_size * microbatch_group_size_per_vp_stage
+        multiple = self.dp_size * (
+            self.microbatch_group_size_per_vp_stage
+            if self.microbatch_group_size_per_vp_stage is not None
+            else 1
+        )
+        if num_packed_sequence % multiple != 0:
+            remainder = num_packed_sequence % multiple
+            num_to_move = multiple - remainder
+            i = num_packed_sequence - 1
+            while num_to_move > 0:
+                assert i >= 0, "Not enough samples to move"
+                if len(packed_id_groups[i]) > 1:
+                    seq_id = packed_id_groups[i].pop()
+                    packed_id_groups.append([seq_id])
+                    num_to_move -= 1
+                else:
+                    i -= 1
+
+        num_micro_batches = int(len(packed_id_groups) / self.dp_size)
+        for i in range(num_micro_batches):
+            sample_id_groups.append([])
+            for j in range(self.cp_size * self.dp_size):
+                seq_id = int(i * self.dp_size + j / self.cp_size)
+                sample_id_groups[i].append(packed_id_groups[seq_id])
+        return sample_id_groups
+
+    def run(
+        self,
+        data_iterator,
+        num_microbatches: int,
+        dp_group,
+        tp_group,
+        pp_group,
+        dp_cp_group,
+        dev: torch.device,
+        config,
+    ):
+        """
+        Run the complete scheduling pipeline.
+
+        Steps:
+            1. Fetch batches and gather global sequence lengths
+            2. Check required sample keys
+            3. Schedule samples into groups
+            4. Reroute samples to DCP ranks
+            5. Build packed microbatches
+            6. Calculate FLOPs info
+            7. Broadcast to PP group (for middle PP stages)
+            8. Broadcast to TP group (for non-TP-0 ranks)
+            9. Handle VPP if enabled
+
+        Args:
+            data_iterator: The data iterator.
+            num_microbatches: The number of microbatches to fetch.
+            dp_group: Data parallel process group.
+            tp_group: Tensor parallel process group.
+            pp_group: Pipeline parallel process group.
+            dp_cp_group: Data parallel + context parallel process group.
+            dev: CUDA device.
+            config: Model parallel config.
+
+        Returns:
+            new_data_iterator: The new data iterator (or list for VPP).
+            num_micro_batches: Number of micro batches after scheduling.
+            seqlen_sum_this_global_batch: Total tokens for FLOPs calculation.
+            seqlen_squared_sum_this_global_batch: Sum of squared seqlens for FLOPs.
+        """
+
+        total_dcp_gpus = dp_cp_group.size()
+
+        # Handle VPP: extract the correct data_iterator for this PP stage.
+        # When VPP is enabled, data_iterator is a list with one entry per VPP stage.
+        # We only need one data_iterator to run the schedule (all VPP stages on the
+        # same PP rank share the same underlying dataset), so pick the first non-None.
+        # Record which VPP stages had data so create_data_iterator knows which ones
+        # need full samples vs metadata only.
+        vpp_has_data = None
+        if (
+            config.virtual_pipeline_model_parallel_size is not None
+            and config.virtual_pipeline_model_parallel_size > 1
+        ):
+            assert len(data_iterator) == config.virtual_pipeline_model_parallel_size
+            vpp_has_data = [di is not None for di in data_iterator]
+            extracted = None
+            for di in data_iterator:
+                if di is not None:
+                    extracted = di
+                    break
+            data_iterator = extracted
+
+        # data_iterator is not None on TP rank 0 for PP stages that need data
+        # (first stage, last stage, or any stage with MTP).
+        if data_iterator is not None:
+            assert tp_group.rank() == 0, "Only TP rank 0 should have data_iterator"
+
+            # Step 1: Fetch batches and gather global sequence lengths
+            batch, global_id_seqlens, global_ids_this_rank, offsets, seqlens_gathered = (
+                get_batch_and_global_seqlens(data_iterator, num_microbatches, dp_group)
+            )
+
+            # Step 2: Check required sample keys
+            for key in self.get_required_sample_keys():
+                assert (
+                    key in batch[0]
+                ), f"Batch missing required key {key}, provided keys: {batch[0].keys()}"
+
+            # Step 3: Schedule samples into groups
+            sample_id_groups = self.get_groups_and_subsamples(global_id_seqlens)
+
+            # Validate scheduling result
+            set_gbs = set()
+            for group in sample_id_groups:
+                for sub in group:
+                    set_gbs.update(sub)
+            assert len(set_gbs) == len(global_id_seqlens), (
+                f"set_gbs length: {len(set_gbs)} != "
+                f"global_id_seqlens length: {len(global_id_seqlens)}"
+            )
+
+            # Step 4: Reroute samples to DCP ranks
+            samples_this_rank_with_id = reroute_samples_to_dcp_ranks(
+                batch,
+                global_ids_this_rank,
+                global_id_seqlens,
+                sample_id_groups,
+                offsets,
+                dp_group,
+                tp_group,
+                dp_cp_group,
+                total_dcp_gpus,
+            )
+
+            dcp_rank = dp_cp_group.rank()
+            num_micro_batches = len(sample_id_groups)
+
+            grouped_samples = [
+                [
+                    samples_this_rank_with_id[sub_sample_id]
+                    for sub_sample_id in sample_id_groups[i][dcp_rank]
+                ]
+                for i in range(num_micro_batches)
+            ]
+
+            # Step 5: Build packed microbatches
+            new_samples = build_packed_microbatches(grouped_samples, dev)
+
+            # Step 6: Calculate FLOPs info
+            seqlen_sum_this_global_batch = float(sum(seqlens_gathered))
+            seqlen_squared_sum_this_global_batch = float(
+                sum(seqlen**2 for seqlen in seqlens_gathered)
+            )
+        else:
+            (
+                new_samples,
+                num_micro_batches,
+                seqlen_sum_this_global_batch,
+                seqlen_squared_sum_this_global_batch,
+            ) = (None, None, None, None)
+
+        # Step 7: Broadcast to PP group (for middle PP stages)
+        if tp_group.rank() == 0:
+            (
+                new_samples,
+                num_micro_batches,
+                seqlen_sum_this_global_batch,
+                seqlen_squared_sum_this_global_batch,
+            ) = broadcast_to_pp_group(
+                new_samples,
+                num_micro_batches,
+                seqlen_sum_this_global_batch,
+                seqlen_squared_sum_this_global_batch,
+                pp_group,
+                dev,
+            )
+
+        # Step 8: Broadcast to TP group (for non-TP-0 ranks)
+        (num_micro_batches, seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch) = (
+            broadcast_scalars(
+                [
+                    num_micro_batches,
+                    seqlen_sum_this_global_batch,
+                    seqlen_squared_sum_this_global_batch,
+                ],
+                tp_group,
+                dev,
+            )
+        )
+        num_micro_batches = int(num_micro_batches)
+
+        # Step 9: create data_iterator and handle VPP if enabled
+        new_data_iterator = create_data_iterator(new_samples, tp_group, config, vpp_has_data)
+
+        return (
+            new_data_iterator,
+            num_micro_batches,
+            seqlen_sum_this_global_batch,
+            seqlen_squared_sum_this_global_batch,
+        )
+
+
+scheduler_map: Dict[str, Type[BasePackingScheduler]] = {"dp_balanced": DpBalancedScheduler}
+
+
+def wrap_data_iterator(
+    data_iterator, config, num_microbatches, pg_collection: Optional[ProcessGroupCollection] = None
+):
+    """
+    A wrapper function that wraps around an existing data_iterator
+    and return the num_micro_batches for sequence packing.
+
+    Args:
+        data_iterator: The original data_iterator to wrap around
+        config: The config object containing the max_seqlen_per_dp_cp_rank
+        dp_cp_group: Data parallel context parallel group.
+        pg_collection: The process group collection.
+    """
+
+    if pg_collection is None:
+        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
+        dp_group = parallel_state.get_data_parallel_group()
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        pp_group = parallel_state.get_pipeline_model_parallel_group()
+    else:
+        dp_cp_group = pg_collection.dp_cp
+        dp_group = pg_collection.dp
+        tp_group = pg_collection.tp
+        pp_group = pg_collection.pp
+    assert (
+        dp_cp_group is not None
+        and dp_group is not None
+        and tp_group is not None
+        and pp_group is not None
+    ), "dp_cp_group, dp_group, tp_group must not be None when using sequence packing"
+
+    dev = torch.cuda.current_device()
+    dp_size = dp_group.size()
+    cp_size = dp_cp_group.size() // dp_size
+
+    # Look up the scheduler class by name
+    scheduler_type = config.sequence_packing_scheduler
+
+    scheduler = scheduler_map[scheduler_type](
+        config.max_seqlen_per_dp_cp_rank,
+        cp_size,
+        dp_size,
+        # When VPP is enabled, align num_micro_batches to this multiple.
+        (
+            None
+            if config.virtual_pipeline_model_parallel_size is None
+            else config.microbatch_group_size_per_vp_stage
+        ),
+    )
+
+    (
+        new_data_iterator,
+        num_micro_batches,
+        seqlen_sum_this_global_batch,
+        seqlen_squared_sum_this_global_batch,
+    ) = scheduler.run(
+        data_iterator, num_microbatches, dp_group, tp_group, pp_group, dp_cp_group, dev, config
+    )
+
+    return (
+        new_data_iterator,
+        num_micro_batches,
+        seqlen_sum_this_global_batch,
+        seqlen_squared_sum_this_global_batch,
+    )
+
+
+def get_batch_on_this_rank_for_sequence_packing(
+    data_iterator,
+    vpp_size: Optional[int] = None,
+    mtp_on_this_rank: bool = False,
+    vp_stage: Optional[int] = None,
+    pg_collection: Optional[ProcessGroupCollection] = None,
+):
+    """
+    Get a batch of data for sequence packing.
+    Args:
+        data_iterator (Iterator): The data iterator to get the batch from.
+        mtp_on_this_rank (bool): Whether to use multi-token prediction.
+        vp_stage (Optional[int]): The stage of the pipeline.
+    Returns:
+        tuple of (tokens, labels, loss_mask, attention_mask, position_ids, packed_seq_params)
+    """
+
+    if pg_collection is None:
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        pp_group = parallel_state.get_pipeline_model_parallel_group()
+        cp_group = parallel_state.get_context_parallel_group()
+    else:
+        tp_group = pg_collection.tp
+        pp_group = pg_collection.pp
+        cp_group = pg_collection.cp
+
+    tp_src_rank = torch.distributed.get_process_group_ranks(tp_group)[0]
+
+    is_tp_rank_0 = tp_group.rank() == 0
+    is_first_stage = pp_group.rank() == 0 and (vp_stage is None or vp_stage == 0)
+    is_last_stage = pp_group.rank() == pp_group.size() - 1 and (
+        vp_stage is None or vp_stage == vpp_size - 1
+    )
+
+    is_first_or_last_stage = is_first_stage or is_last_stage
+    dev = torch.cuda.current_device()
+
+    # data_iterator should return a batch including the following keys.
+    batch_keys = ['cu_seqlens', 'cu_seqlens_padded', 'max_seqlen']
+    if is_first_stage or mtp_on_this_rank:
+        batch_keys.append('tokens')
+        batch_keys.append('position_ids')
+    if is_last_stage or mtp_on_this_rank:
+        batch_keys.append('labels')
+        batch_keys.append('loss_mask')
+
+    # Get a batch from data_iterator or create an emtpy batch.
+    if is_tp_rank_0:
+        assert data_iterator is not None
+        batch = next(data_iterator)
+        for key in batch_keys:
+            assert key in batch, f"{key} is missing in current batch."
+    else:
+        assert data_iterator is None, "Non TP 0 rank should not have data_iterator"
+        batch = {}
+
+    # Partition tokens, position_ids, labels, loss_mask for context parallel.
+    # Only TP rank 0 on stages that have data (first/last PP stage or MTP stage) needs this.
+    if is_tp_rank_0 and (is_first_or_last_stage or mtp_on_this_rank):
+        get_cp_slice_for_thd(batch, cp_group)
+
+    # Broadcast cu_seqlens_size because we need it to create placeholder for cu_seqlens and
+    # cu_seqlens_padded for non TP 0 ranks.
+    if is_tp_rank_0:
+        cu_seqlen_size = torch.tensor(batch['cu_seqlens'].size(0), dtype=torch.int32, device=dev)
+    else:
+        cu_seqlen_size = torch.empty(1, dtype=torch.int32, device=dev)
+    broadcast_tensor(cu_seqlen_size, tp_src_rank, tp_group)
+    cu_seqlen_size = cu_seqlen_size.item()
+
+    # Broadcast total_tokens because we need it to create placeholder for tokens, position_ids,
+    # labels, loss_mask for non TP 0 ranks. Only first stage, last stage,
+    # and stage with mtp need this.
+
+    if is_first_or_last_stage or mtp_on_this_rank:
+        if is_tp_rank_0:
+            total_tokens = torch.tensor(batch['tokens'].size(0), dtype=torch.int32, device=dev)
+        else:
+            total_tokens = torch.empty(1, dtype=torch.int32, device=dev)
+        broadcast_tensor(total_tokens, tp_src_rank, tp_group)
+        total_tokens = total_tokens.item()
+
+    # Step1: Prepare "tokens", "position_ids" for first stage and stage with mtp on all TP ranks.
+    if is_first_stage or mtp_on_this_rank:
+        if is_tp_rank_0:
+            assert batch['tokens'].dtype == torch.int64
+            assert batch['position_ids'].dtype == torch.int64
+            batch['tokens'] = batch['tokens'].view(1, total_tokens)
+            batch['position_ids'] = batch['position_ids'].view(1, total_tokens)
+        else:
+            batch['tokens'] = torch.empty([1, total_tokens], dtype=torch.int64, device=dev)
+            batch['position_ids'] = torch.empty([1, total_tokens], dtype=torch.int64, device=dev)
+    else:
+        # Non first stage rank doesn't need tokens and position_ids.
+        batch['tokens'] = None
+        batch['position_ids'] = None
+
+    # Step2: Prepare "labels", "loss_mask" for last stage and stage with mtp on all TP ranks.
+    if is_last_stage or mtp_on_this_rank:
+        if is_tp_rank_0:
+            assert batch['labels'].dtype == torch.int64
+            assert batch['loss_mask'].dtype == torch.float32
+            batch['labels'] = batch['labels'].view(1, total_tokens)
+            batch['loss_mask'] = batch['loss_mask'].view(1, total_tokens)
+        else:
+            batch['labels'] = torch.empty([1, total_tokens], dtype=torch.int64, device=dev)
+            batch['loss_mask'] = torch.empty([1, total_tokens], dtype=torch.float32, device=dev)
+    else:
+        # Non last stage rank doesn't need labels and loss_mask.
+        batch['labels'] = None
+        batch['loss_mask'] = None
+
+    # Step3: Prepare "cu_seqlens", "cu_seqlens_padded", "max_seqlen" on all ranks.
+    if is_tp_rank_0:
+        assert batch['cu_seqlens'].dtype == torch.int32
+        assert batch['cu_seqlens_padded'].dtype == torch.int32
+        assert batch['cu_seqlens'].dim() == 1
+        assert batch['cu_seqlens_padded'].dim() == 1
+        if type(batch['max_seqlen']) == int:
+            batch['max_seqlen'] = torch.tensor(batch['max_seqlen'], dtype=torch.int32, device=dev)
+        else:
+            assert batch['max_seqlen'].dtype == torch.int32
+            assert batch['max_seqlen'].numel() == 1
+    else:
+        batch['cu_seqlens'] = torch.empty([cu_seqlen_size], dtype=torch.int32, device=dev)
+        batch['cu_seqlens_padded'] = torch.empty([cu_seqlen_size], dtype=torch.int32, device=dev)
+        batch['max_seqlen'] = torch.empty(1, dtype=torch.int32, device=dev)
+
+    # Broadcast batch inside TP group.
+    broadcast_tensor(batch['tokens'], tp_src_rank, tp_group)
+    broadcast_tensor(batch['position_ids'], tp_src_rank, tp_group)
+    broadcast_tensor(batch['labels'], tp_src_rank, tp_group)
+    broadcast_tensor(batch['loss_mask'], tp_src_rank, tp_group)
+    broadcast_tensor(batch['cu_seqlens'], tp_src_rank, tp_group)
+    broadcast_tensor(batch['cu_seqlens_padded'], tp_src_rank, tp_group)
+    broadcast_tensor(batch['max_seqlen'], tp_src_rank, tp_group)
+
+    # Extract the data from batch after broadcasting.
+    tokens = batch['tokens']
+    position_ids = batch['position_ids']
+    labels = batch['labels']
+    loss_mask = batch['loss_mask']
+    cu_seqlens = batch['cu_seqlens']
+    cu_seqlens_padded = batch['cu_seqlens_padded']
+    max_seqlen = batch['max_seqlen'].item()
+
+    # Transformer Engine has a bug of cu_seqlens, we must treat cu_seqlens_padded as cu_seqlens to
+    # get the correct result.
+    # TODO: Revert this workaround once TE fixes the issue.
+    packed_seq_params = PackedSeqParams(
+        qkv_format="thd",
+        cu_seqlens_q=cu_seqlens_padded,
+        cu_seqlens_kv=cu_seqlens_padded,
+        cu_seqlens_q_padded=cu_seqlens_padded,
+        cu_seqlens_kv_padded=cu_seqlens_padded,
+        max_seqlen_q=max_seqlen,
+        max_seqlen_kv=max_seqlen,
+        local_cp_size=None,
+        cp_group=None,
+    )
+
+    # "attention_mask" is not valid for sequence packing, so set it to None.
+    return tokens, labels, loss_mask, None, position_ids, packed_seq_params
diff --git a/megatron/core/datasets/data_schedule_utils.py b/megatron/core/datasets/data_schedule_utils.py
new file mode 100644
index 00000000000..f3c637e4c79
--- /dev/null
+++ b/megatron/core/datasets/data_schedule_utils.py
@@ -0,0 +1,529 @@
+# Copyright (c) 2025 NVIDIA CORPORATION.  All rights reserved.
+
+from typing import Dict, List
+
+import numpy as np
+import torch
+
+from megatron.core.extensions.transformer_engine import get_thd_partitioned_indices
+from megatron.core.rerun_state_machine import RerunDataIterator
+
+
+def get_cp_slice_for_thd(batch, cp_group):
+    """Partition sequence data for context parallelism in THD format.
+
+    Uses TE's THD partitioned indices to split the packed sequence across CP ranks.
+    Only keys present in the batch are sliced.
+
+    Args:
+        batch: Dict with packed sequence data.
+        cp_group: Context parallel process group.
+    """
+    cp_size = cp_group.size()
+    if cp_size <= 1:
+        return
+    cp_rank = cp_group.rank()
+    total_tokens = batch['tokens'].size(0)
+    # Transformer Engine has a bug of cu_seqlens, we must treat cu_seqlens_padded as
+    # cu_seqlens to get the correct result.
+    # TODO: Revert this workaround once TE fixes the issue.
+    cu_seqlens = batch["cu_seqlens_padded"]
+    index = get_thd_partitioned_indices(cu_seqlens, total_tokens, cp_size, cp_rank)
+    for key in ['tokens', 'position_ids', 'labels', 'loss_mask']:
+        if key in batch:
+            batch[key] = batch[key].index_select(0, index)
+
+
+def _unpack_batch(batch: List[Dict[str, torch.Tensor]]) -> List[Dict[str, torch.Tensor]]:
+    """
+    Unpacks the packed samples into a list of sub-samples.
+    Since each sub-sample may be routed to different DPxCP ranks,
+    we unpack the sample here to avoid unnecessarily transferring
+    the entire packed sample.
+    """
+    batch_unpacked = []
+    dev = batch[0]["tokens"].device
+    original_seq_lens = []
+    padded_seq_lens = []
+    for sample in batch:
+        for key in sample.keys():
+            if len(sample[key].shape) == 2:
+                # squeeze the redundant batch dimension added by
+                # default collate_fn in pytorch dataloader
+                # we need a custom collate_fn for THD to avoid this
+                # current THD does not support micro_batch_size > 1 due to sft_dataset.py and
+                # data_loader in data_samples.py
+                sample[key] = sample[key].squeeze(0)
+        for sub_sample in range(sample["cu_seqlens"].shape[0] - 1):
+            sub_sample_dict = {}
+            start_idx = sample["cu_seqlens"][sub_sample]
+            end_idx = sample["cu_seqlens"][sub_sample + 1]
+            if end_idx - start_idx == 0:
+                continue
+            for key in ["tokens", "labels", "loss_mask", "position_ids"]:
+                sub_sample_dict[key] = sample[key][start_idx:end_idx]
+            # Since sft_dataset.py does not provide cu_seqlens_original,
+            # we assume original_seq_len equals padded_seq_len here.
+            # Ideally the dataset should define the pre-padding seq_len.
+            seq_len = (end_idx - start_idx).item()
+            original_seq_lens.append(seq_len)
+            padded_seq_lens.append(seq_len)
+            batch_unpacked.append(sub_sample_dict)
+
+    # Single H2D transfer for all seq lens
+    original_seq_lens_cuda = torch.tensor(original_seq_lens, device=dev)
+    padded_seq_lens_cuda = torch.tensor(padded_seq_lens, device=dev)
+    for i, sub_sample_dict in enumerate(batch_unpacked):
+        sub_sample_dict["original_seq_len"] = original_seq_lens_cuda[i : i + 1]
+        sub_sample_dict["padded_seq_len"] = padded_seq_lens_cuda[i : i + 1]
+
+    return batch_unpacked
+
+
+def _get_global_seqlens_and_ids(subsample_seqlens: torch.Tensor, dp_group):
+    """
+    Gathers the sequence lengths of all subsamples from all DP ranks and calculates global IDs.
+    """
+    # Collect the number of subsamples from all ranks
+    num_local_subsamples = subsample_seqlens.shape[0]
+    local_len = torch.tensor([num_local_subsamples], dtype=torch.int32).cuda()
+    dp_subsample_count = [torch.zeros_like(local_len) for _ in range(dp_group.size())]
+    torch.distributed.all_gather(dp_subsample_count, local_len, group=dp_group)
+
+    # Find the max number of subsamples across all ranks and pad subsample_seqlens to max length
+    dp_subsample_counts = torch.stack(dp_subsample_count, dim=0).cpu().view(-1)
+    max_sub_samples = int(dp_subsample_counts.max().item())
+
+    if num_local_subsamples < max_sub_samples:
+        subsample_seqlens_padded = torch.cat(
+            [
+                subsample_seqlens,
+                torch.zeros(max_sub_samples - num_local_subsamples, dtype=torch.int32).cuda(),
+            ],
+            dim=0,
+        )
+    else:
+        subsample_seqlens_padded = subsample_seqlens
+
+    # Gather the subsample_seqlens from all ranks
+    seqlens_gathered = [torch.empty_like(subsample_seqlens_padded) for _ in range(dp_group.size())]
+    torch.distributed.all_gather(seqlens_gathered, subsample_seqlens_padded, group=dp_group)
+
+    # Trim each seqlens_gathered to the length of the correct sample
+    for dp_rank, seqlen in enumerate(seqlens_gathered):
+        seqlens_gathered[dp_rank] = seqlen[: dp_subsample_counts[dp_rank]]
+
+    seqlens_gathered = torch.cat(seqlens_gathered, dim=0)
+    seqlens_gathered = seqlens_gathered.cpu().tolist()
+
+    # Calculate the offsets to assign unique global ID to each subsample.
+    csum = torch.cumsum(dp_subsample_counts, dim=0, dtype=torch.int32)
+    offsets = torch.cat([torch.zeros(1, dtype=torch.int32), csum], dim=0)
+
+    # Calculate global ID for each subsample
+    dp_rank = dp_group.rank()
+    global_ids = torch.arange(len(seqlens_gathered), dtype=torch.int32).cuda()
+
+    # Create a list of (global_id, seqlen) tuples for scheduling
+    global_id_seqlens = [(i, seqlens_gathered[i]) for i in range(len(global_ids))]
+
+    # Get the global IDs locally present on this rank
+    start_idx = offsets[dp_rank]
+    end_idx = offsets[dp_rank + 1]
+
+    global_ids_this_rank = global_ids[start_idx:end_idx]
+
+    return global_id_seqlens, global_ids_this_rank, offsets, seqlens_gathered
+
+
+def _pack_sequences(
+    samples: List, padded_lengths: torch.Tensor, original_lengths: torch.Tensor, dev: torch.device
+) -> Dict[str, torch.Tensor]:
+    """Pack multiple samples into a single packed sample."""
+
+    def _pack_tensors(tensors):
+        return torch.cat([t.reshape(-1) for t in tensors], dim=0)
+
+    tokens = _pack_tensors([sample["tokens"] for sample in samples])
+    labels = _pack_tensors([sample["labels"] for sample in samples])
+    loss_mask = _pack_tensors([sample["loss_mask"] for sample in samples])
+    position_ids = _pack_tensors([sample["position_ids"] for sample in samples])
+
+    new_sample = {}
+    new_sample["tokens"] = tokens
+    new_sample["labels"] = labels
+    new_sample["loss_mask"] = loss_mask
+    new_sample["position_ids"] = position_ids
+
+    padded_lengths = padded_lengths.to(device=dev, dtype=torch.int32, non_blocking=True).reshape(-1)
+    cu_seqlens_padded = torch.empty(padded_lengths.numel() + 1, device=dev, dtype=torch.int32)
+    cu_seqlens_padded[0] = 0
+    cu_seqlens_padded[1:] = torch.cumsum(padded_lengths, dim=0)
+    max_seqlen = torch.max(padded_lengths).to(dtype=torch.int32)
+
+    new_sample["cu_seqlens_padded"] = cu_seqlens_padded
+    new_sample["max_seqlen"] = max_seqlen
+
+    original_lengths = original_lengths.to(
+        device=dev, dtype=torch.int32, non_blocking=True
+    ).reshape(-1)
+    cu_seqlens = torch.empty(original_lengths.numel() + 1, device=dev, dtype=torch.int32)
+    cu_seqlens[0] = 0
+    cu_seqlens[1:] = torch.cumsum(original_lengths, dim=0).reshape(-1)
+    new_sample["cu_seqlens"] = cu_seqlens
+
+    return new_sample
+
+
+def broadcast_tensor(item, src_rank, group) -> None:
+    """Broadcast a tensor from src_rank to all ranks in the group."""
+    if item is not None:
+        torch.distributed.broadcast(item, src_rank, group=group)
+
+
+def broadcast_to_pp_group(
+    new_samples,
+    num_micro_batches,
+    seqlen_sum_this_global_batch,
+    seqlen_squared_sum_this_global_batch,
+    pp_group,
+    dev,
+):
+    """
+    Broadcast num_micro_batches, seqlen_sum_this_global_batch,
+    seqlen_squared_sum_this_global_batch and metadata to middle PP stages.
+    Before this broadcast, the new_samples on middle PP stages are None,
+    after this broadcast, the new_samples on middle PP stages contain the metadata but
+    without tokens, labels, loss_mask, position_ids.
+    """
+
+    pp_src_rank = torch.distributed.get_process_group_ranks(pp_group)[0]
+
+    if pp_group.size() > 2:
+        if pp_group.rank() == 0:
+            tensor_list = [
+                torch.tensor(
+                    [
+                        num_micro_batches,
+                        seqlen_sum_this_global_batch,
+                        seqlen_squared_sum_this_global_batch,
+                    ],
+                    dtype=torch.float32,
+                ).cuda()
+            ]
+            for sample in new_samples:
+                tensor_list.append(sample["max_seqlen"].unsqueeze(0))
+            for sample in new_samples:
+                tensor_list.append(sample["cu_seqlens"])
+                tensor_list.append(sample["cu_seqlens_padded"])
+            info_to_broadcast = torch.cat(tensor_list, dim=0).to(device=dev, dtype=torch.float32)
+            info_length_tensor = torch.tensor(info_to_broadcast.shape[0], dtype=torch.int32).cuda()
+            broadcast_tensor(info_length_tensor, pp_src_rank, pp_group)
+            broadcast_tensor(info_to_broadcast, pp_src_rank, pp_group)
+        else:
+            info_length_tensor = torch.tensor(0, dtype=torch.int32).cuda()
+            broadcast_tensor(info_length_tensor, pp_src_rank, pp_group)
+            info_to_broadcast = torch.empty(info_length_tensor.item(), dtype=torch.float32).cuda()
+            broadcast_tensor(info_to_broadcast, pp_src_rank, pp_group)
+            if pp_group.rank() != pp_group.size() - 1:
+                # middle PP stages receive the broadcasted info and unpack it
+                info_numpy = info_to_broadcast.cpu().numpy()
+                num_micro_batches = int(info_numpy[0])
+                seqlen_sum_this_global_batch = info_numpy[1]
+                seqlen_squared_sum_this_global_batch = info_numpy[2]
+                max_seqlens = info_to_broadcast[3 : 3 + num_micro_batches]
+                cu_seqlens_list = []
+                cu_seqlens_padded_list = []
+                # cu_seqlens always starts with 0, and the other metadata values
+                # (num_micro_batches, seqlen_sum, seqlen_squared_sum, max_seqlens)
+                # are always positive, so we can use 0 as the delimiter to locate
+                # the start of each cu_seqlens / cu_seqlens_padded tensor.
+                # This avoids an extra broadcast for the lengths of cu_seqlens.
+                indices = np.where(info_numpy == 0)[0]
+                for i in range(num_micro_batches):
+                    cu_seqlens_list.append(info_to_broadcast[indices[i * 2] : indices[i * 2 + 1]])
+                    if i == num_micro_batches - 1:
+                        cu_seqlens_padded_list.append(info_to_broadcast[indices[i * 2 + 1] :])
+                    else:
+                        cu_seqlens_padded_list.append(
+                            info_to_broadcast[indices[i * 2 + 1] : indices[i * 2 + 2]]
+                        )
+
+                new_samples = []
+                for i in range(num_micro_batches):
+                    new_sample = {}
+                    new_sample["max_seqlen"] = max_seqlens[i].to(torch.int32)
+                    new_sample["cu_seqlens"] = cu_seqlens_list[i].to(torch.int32)
+                    new_sample["cu_seqlens_padded"] = cu_seqlens_padded_list[i].to(torch.int32)
+                    new_samples.append(new_sample)
+
+    return (
+        new_samples,
+        num_micro_batches,
+        seqlen_sum_this_global_batch,
+        seqlen_squared_sum_this_global_batch,
+    )
+
+
+def broadcast_scalars(values: List, group, dev, dtype=torch.float32) -> List:
+    """
+    Broadcast scalar values from rank 0 to all ranks in the group.
+
+    Args:
+        values: List of scalar values to broadcast (only used on rank 0).
+        group: The process group to broadcast within.
+        dev: The device to use for the tensor.
+        dtype: The data type for the tensor.
+
+    Returns:
+        List of broadcasted values.
+    """
+    if group.size() <= 1:
+        return values
+
+    src_rank = torch.distributed.get_process_group_ranks(group)[0]
+    num_values = len(values)
+
+    if group.rank() == 0:
+        info_to_broadcast = torch.tensor(values, dtype=dtype, device=dev)
+    else:
+        info_to_broadcast = torch.zeros(num_values, dtype=dtype, device=dev)
+
+    broadcast_tensor(info_to_broadcast, src_rank, group)
+
+    if group.rank() != 0:
+        values = info_to_broadcast.cpu().tolist()
+
+    return values
+
+
+def create_data_iterator(new_samples, tp_group, config, vpp_has_data=None):
+    """Handle virtual pipeline parallelism.
+
+    For VPP, each PP rank needs a list of data iterators (one per VPP stage).
+    VPP stages that originally had a data_iterator (indicated by vpp_has_data)
+    get full samples; others get metadata only (cu_seqlens, cu_seqlens_padded,
+    max_seqlen).
+
+    Args:
+        new_samples: The packed samples after scheduling.
+        tp_group: Tensor parallel process group.
+        config: Model parallel config.
+        vpp_has_data: A list of booleans (one per VPP stage) indicating which
+            VPP stages originally had a data_iterator. None if VPP is disabled.
+    """
+    if (
+        config.virtual_pipeline_model_parallel_size is not None
+        and config.virtual_pipeline_model_parallel_size > 1
+    ):
+        vpp_size = config.virtual_pipeline_model_parallel_size
+        if tp_group.rank() == 0:
+            metadata = [
+                {k: sample[k] for k in ["max_seqlen", "cu_seqlens", "cu_seqlens_padded"]}
+                for sample in new_samples
+            ]
+            new_data_iterator = []
+            for i in range(vpp_size):
+                if vpp_has_data is not None and vpp_has_data[i]:
+                    new_data_iterator.append(RerunDataIterator(iter(new_samples)))
+                else:
+                    new_data_iterator.append(RerunDataIterator(iter(metadata)))
+        else:
+            new_data_iterator = [None for _ in range(vpp_size)]
+    else:
+        new_data_iterator = RerunDataIterator(iter(new_samples)) if tp_group.rank() == 0 else None
+
+    return new_data_iterator
+
+
+def reroute_samples_to_dcp_ranks(
+    batch,
+    global_ids_this_rank,
+    global_id_seqlens,
+    sample_id_groups,
+    offsets,
+    dp_group,
+    tp_group,
+    dp_cp_group,
+    total_dcp_gpus,
+):
+    """
+    Reroutes the sub-samples to the correct rank after scheduling.
+
+    For each key in the batch dict, we perform an all-to-all communication
+    to transfer the data to the correct ranks.
+    """
+
+    def _gid_to_src_rank(gid: int) -> int:
+        dp_src_rank = torch.bucketize(gid, offsets[1:] - 1)
+        dcp_rank = (
+            torch.distributed.get_process_group_ranks(dp_group)[dp_src_rank] // tp_group.size()
+        ) % dp_cp_group.size()
+        return dcp_rank
+
+    gid2local_id = {int(gid): i for i, gid in enumerate(global_ids_this_rank)}
+    dcp_rank = dp_cp_group.rank()
+    dp_ranks = torch.distributed.get_process_group_ranks(dp_group)
+    dp_ranks = [(r // tp_group.size()) % dp_cp_group.size() for r in dp_ranks]
+
+    data_keys = batch[0].keys()
+
+    # Create the send plan
+    combined_sample_id_groups: List[List[int]] = [[] for _ in range(total_dcp_gpus)]
+    for d in range(total_dcp_gpus):
+        for sample_id_group in sample_id_groups:
+            combined_sample_id_groups[d].extend(sample_id_group[d])
+    for dest_rank in range(total_dcp_gpus):
+        combined_sample_id_groups[dest_rank].sort()
+
+    send_ids_sorted = [
+        gid for d in dp_ranks for gid in combined_sample_id_groups[d] if gid in global_ids_this_rank
+    ]
+
+    send_num_split = [0] * total_dcp_gpus
+    send_lens_split = [0] * total_dcp_gpus
+    for dest_rank in range(total_dcp_gpus):
+        if dest_rank in dp_ranks:
+            send_seq_lens = [
+                global_id_seqlens[gid][1]
+                for gid in combined_sample_id_groups[dest_rank]
+                if gid in global_ids_this_rank
+            ]
+            send_num_split[dest_rank] = len(send_seq_lens)
+            send_lens_split[dest_rank] = sum(send_seq_lens)
+        else:
+            send_lens_split[dest_rank] = 0
+
+    # Create the recv plan
+    recv_sample_id_groups = [[] for _ in range(total_dcp_gpus)]
+    for gid in combined_sample_id_groups[dcp_rank]:
+        src_rank = _gid_to_src_rank(gid)
+        recv_sample_id_groups[src_rank].append(gid)
+
+    recv_lens_split = [0] * total_dcp_gpus
+    for src_rank in range(total_dcp_gpus):
+        recv_lens_split[src_rank] = sum(
+            [global_id_seqlens[gid][1] for gid in recv_sample_id_groups[src_rank]]
+        )
+
+    recv_ids_sorted = [gid for d in range(total_dcp_gpus) for gid in recv_sample_id_groups[d]]
+    recv_counts = [len(recv_sample_id_groups[d]) for d in range(total_dcp_gpus)]
+
+    recv_samples = [{k: None for k in data_keys} for _ in range(sum(recv_counts))]
+
+    def _pack_sample_by_key(key: str) -> torch.Tensor:
+        flattened_tensors = []
+        for gid in send_ids_sorted:
+            t = batch[gid2local_id[gid]][key].to(torch.cuda.current_device(), non_blocking=True)
+            flattened_tensors.append(t.reshape(-1))
+        return (
+            torch.cat(flattened_tensors, dim=0)
+            if flattened_tensors
+            else torch.empty(1, device=torch.cuda.current_device(), dtype=batch[0][key].dtype)
+        )
+
+    def _unpack_sample_by_key(key: str, recv_tensor: torch.Tensor):
+        cursor = 0
+        for i, gid in enumerate(recv_ids_sorted):
+            sample_len = (
+                1 if key in ["original_seq_len", "padded_seq_len"] else global_id_seqlens[gid][1]
+            )
+            recv_samples[i][key] = recv_tensor[cursor : cursor + sample_len]
+            cursor += sample_len
+
+    for key in data_keys:
+        output_split_sizes, input_split_sizes = (
+            (recv_counts, send_num_split)
+            if key in ["original_seq_len", "padded_seq_len"]
+            else (recv_lens_split, send_lens_split)
+        )
+        send_tensor = _pack_sample_by_key(key)
+        recv_tensor_size = sum(output_split_sizes)
+        recv_tensor = torch.empty(
+            recv_tensor_size, device=torch.cuda.current_device(), dtype=send_tensor.dtype
+        )
+        torch.distributed.all_to_all_single(
+            output=recv_tensor,
+            input=send_tensor,
+            output_split_sizes=output_split_sizes,
+            input_split_sizes=input_split_sizes,
+            group=dp_cp_group,
+        )
+        _unpack_sample_by_key(key, recv_tensor)
+
+    recv_sample_with_id = {recv_id: recv_samples[i] for i, recv_id in enumerate(recv_ids_sorted)}
+    return recv_sample_with_id
+
+
+def build_packed_microbatches(
+    grouped_samples: List[List[Dict[str, torch.Tensor]]], dev: torch.device
+) -> List[Dict[str, torch.Tensor]]:
+    """Build packed samples for each microbatch."""
+    num_micro_batches = len(grouped_samples)
+    seg_starts: List[int] = [0]
+    original_lens_tensors = []
+    padded_lens_tensors = []
+
+    for i in range(num_micro_batches):
+        samples = grouped_samples[i]
+        seg_starts.append(seg_starts[-1] + len(samples))
+        original_lens_tensors.extend([s["original_seq_len"].reshape(-1) for s in samples])
+        padded_lens_tensors.extend([s["padded_seq_len"].reshape(-1) for s in samples])
+
+    padded_lens_all_gpu = torch.cat(padded_lens_tensors, dim=0).to(dtype=torch.int32)
+    original_lens_all_gpu = torch.cat(original_lens_tensors, dim=0).to(dtype=torch.int32)
+
+    new_samples: List[Dict[str, torch.Tensor]] = []
+    for i in range(num_micro_batches):
+        samples = grouped_samples[i]
+        lens_padded = padded_lens_all_gpu[seg_starts[i] : seg_starts[i + 1]]
+        lens_original = original_lens_all_gpu[seg_starts[i] : seg_starts[i + 1]]
+        new_sample = _pack_sequences(samples, lens_padded, lens_original, dev)
+        new_samples.append(new_sample)
+
+    return new_samples
+
+
+def get_batch_and_global_seqlens(data_iterator, num_microbatches, dp_group):
+    """
+    Get the batch and global sequence lengths.
+    Each DP rank loads the same number of sequences, so we need to gather the sequence
+    lengths from all ranks then we can schedule the sequences into groups.
+    Args:
+        data_iterator: The data iterator.
+        num_microbatches: The number of microbatches.
+        dp_group: The data parallel group.
+
+    Returns:
+        batch: The batch.
+        global_id_seqlens: The global sequence lengths.
+        global_ids_this_rank: The global IDs locally present on this rank.
+    """
+
+    batch_list = [next(data_iterator) for _ in range(num_microbatches)]
+
+    batch = []
+    for item in batch_list:
+        if isinstance(item, dict):
+            batch.append(item)
+        elif isinstance(item, list):
+            batch.extend(item)
+        else:
+            raise ValueError(f"Invalid item type: {type(item)}")
+
+    # in sft_dataset.py, sequences are already packed before rescheduling,
+    # so we need to unpack them here and repack after rescheduling.
+    # This is only to adapt to the current megatron-lm sft_dataset.
+    # If you implement your own dataset, just have __getitem__ return List[Dict]
+    # and this step can be skipped.
+    batch = _unpack_batch(batch)
+
+    subsample_seqlens = torch.cat([sample["padded_seq_len"] for sample in batch]).to(
+        dtype=torch.int32, device=torch.cuda.current_device()
+    )
+
+    global_id_seqlens, global_ids_this_rank, offsets, seqlens_gathered = (
+        _get_global_seqlens_and_ids(subsample_seqlens, dp_group)
+    )
+
+    return batch, global_id_seqlens, global_ids_this_rank, offsets, seqlens_gathered
diff --git a/megatron/core/datasets/gpt_dataset.py b/megatron/core/datasets/gpt_dataset.py
index 42146d1acd2..418a02719df 100644
--- a/megatron/core/datasets/gpt_dataset.py
+++ b/megatron/core/datasets/gpt_dataset.py
@@ -58,8 +58,8 @@ class GPTDatasetConfig(BlendedMegatronDatasetConfig):
     Set to 0 if sequence parallel is not enabled regardless of TP size.
     """
 
-    hybrid_context_parallel: bool = False
-    """Option to enable hybrid context parallelism. When setting this to True, 
+    dynamic_context_parallel: bool = False
+    """Option to enable dynamic context parallelism. When setting this to True, 
     each sample should be divisible by the data parallel size * context parallel size * 2.
     If sequence parallel is enabled, it should be divisible by the 
     data parallel size * context parallel size * sequence parallel size * 2.
@@ -76,6 +76,9 @@ class GPTDatasetConfig(BlendedMegatronDatasetConfig):
     context_parallel_size: Optional[int] = None
     """The size of the context parallel group. Needed for padding in packed sequences."""
 
+    sft_mock_dataset_config_json: Optional[str] = None
+    """This config provides the necessary information for the mock dataset."""
+
     def __post_init__(self) -> None:
         """Do asserts and set fields post init"""
         super().__post_init__()
diff --git a/megatron/core/datasets/readme.md b/megatron/core/datasets/readme.md
index 452bf24e4a2..a61c623d960 100644
--- a/megatron/core/datasets/readme.md
+++ b/megatron/core/datasets/readme.md
@@ -192,6 +192,68 @@ To query the `BlendedDataset` for the _k_-th sample we do the following
 
 To save time during initialization, each index is built/cached sequentially on one process rank and subsequently loaded in parallel on other process ranks. The cached indices are unique to a hash generated in the `BlendedDataset.__init__` function.
 
+## Packing Scheduler
+
+The packing scheduler re-schedules variable-length sequences across DP×CP ranks to improve GPU utilization. It is built around two modules: `data_schedule.py` (high-level logic and entry points) and `data_schedule_utils.py` (utility functions).
+
+### Call Hierarchy
+
+The scheduling pipeline has two phases connected by the data iterator: `wrap_data_iterator` consumes the **original** data iterator, performs global-batch scheduling, and produces a **wrapped** (packed) data iterator; `get_batch_on_this_rank_for_sequence_packing` then consumes this **wrapped** data iterator to fetch individual packed microbatches during training.
+
+```
+                          original                              wrapped (packed)
+                       data_iterator                             data_iterator
+                            │                                        │
+                            ▼                                        ▼
+               ┌────────────────────────┐               ┌────────────────────────────────────┐
+               │  wrap_data_iterator()  │               │ get_batch_on_this_rank_for_        │
+Phase 1        │  (once per global      │   ────────►   │       sequence_packing()            │  Phase 2
+(scheduling)   │       batch)           │   returns     │  (once per microbatch,              │  (fetching)
+               │                        │   wrapped     │   called by training loop)          │
+               └───────────┬────────────┘   iterator    └──────────────┬─────────────────────┘
+                           │                                           │
+                           ▼                                           ▼
+          DpBalancedScheduler.run()                   next(wrapped_data_iterator)
+          │                                           ├─ get_thd_partitioned_indices()  [TE]
+          ├─ get_batch_and_global_seqlens()  [utils]  ├─ broadcast_tensor()             [utils]
+          ├─ get_groups_and_subsamples()              └─ PackedSeqParams(...)
+          ├─ reroute_samples_to_dcp_ranks()  [utils]
+          ├─ build_packed_microbatches()     [utils]
+          ├─ broadcast_to_pp_group()         [utils]
+          ├─ broadcast_scalars()             [utils]
+          └─ create_data_iterator()          [utils]
+```
+
+### `data_schedule.py`
+
+#### Entry Points
+
+- **`wrap_data_iterator(original_data_iterator) → wrapped_data_iterator`** — Top-level entry point called once per global batch. Takes the **original** data iterator as input, resolves the scheduler class from `scheduler_map`, instantiates it, and delegates to `scheduler.run()` which consumes all microbatches from the original iterator, re-schedules them, and produces a **wrapped** (packed) data iterator along with the updated `num_microbatches` and FLOPs statistics.
+
+- **`get_batch_on_this_rank_for_sequence_packing(wrapped_data_iterator)`** — Per-microbatch entry point called by the training loop. Takes the **wrapped** data iterator returned by `wrap_data_iterator` as input. Fetches one packed microbatch via `next(wrapped_data_iterator)`, broadcasts batch fields across TP ranks, optionally partitions sequences across CP ranks using Transformer Engine's `thd_get_partitioned_indices`, and constructs `PackedSeqParams` (with `cu_seqlens`, `max_seqlen`, `qkv_format=thd`).
+
+#### Scheduler Classes
+
+- **`BasePackingScheduler`** — Abstract base class. Defines the interface:
+  - `get_groups_and_subsamples()` — pure scheduling algorithm (must be overridden).
+  - `run()` — full pipeline: fetch → schedule → reroute → pack → broadcast → VPP handling.
+
+- **`DpBalancedScheduler(BasePackingScheduler)`** — Concrete scheduler that packs sequences in their original order until reaching `max_seqlen_per_dp_cp_rank × cp_size`. Aligns the number of microbatches to `dp_size` (and VPP stage multiples when applicable).
+
+### `data_schedule_utils.py`
+
+Utility functions consumed by the schedulers above:
+
+| Function | Role |
+|---|---|
+| `get_batch_and_global_seqlens()` | Fetch `num_microbatches` batches from the data iterator and all-gather sequence lengths across DP ranks. |
+| `reroute_samples_to_dcp_ranks()` | All-to-all communication to transfer sub-samples to their scheduled DP×CP rank. |
+| `build_packed_microbatches()` | Concatenate sub-samples within each microbatch group and produce `cu_seqlens`. |
+| `broadcast_to_pp_group()` | Broadcast packed samples and metadata from the first/last PP stage to middle stages. |
+| `broadcast_scalars()` | Broadcast scalar values (e.g. `num_microbatches`, FLOPs stats) across a process group. |
+| `broadcast_tensor()` | Broadcast a single tensor within a process group. |
+| `create_data_iterator()` | Wrap packed sample lists into a data iterator; handles VPP stage splitting. |
+
 ## Fast DataLoader initialization
 
 Especially for large-scale runs, DataLoader initialization can take several minutes, since it involves opening and memory-mapping multiple files and can significantly stress the filesystem. To speed up this process, we have developed the following three optimizations, controlled by configuration flags":
diff --git a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
index 8993620c779..40f07d3df72 100644
--- a/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
+++ b/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
@@ -80,6 +80,8 @@ def __init__(
         if has_config_logger_enabled(config):
             log_config_to_disk(config, locals(), prefix=type(self).__name__)
 
+        self.num_moe_experts = getattr(config, "num_moe_experts", None)
+
         self.ddp_config = ddp_config
         log_single_rank(
             logger,
@@ -284,7 +286,7 @@ def _init_dist_index(self, pg_collection):
             expt_tp_group = single_rank_group
 
         if enable_hsdp:
-            if expt_dp_group is not None:
+            if self.num_moe_experts is not None:
                 expt_mesh = _get_hsdp_tp_mesh(
                     outer_fsdp_group, expt_dp_group, expt_tp_group, ep_size=ep_group.size()
                 )
@@ -313,7 +315,7 @@ def _init_dist_index(self, pg_collection):
                 expt_device_mesh=expt_device_mesh,
             )
         else:
-            if ep_group is not None:
+            if self.num_moe_experts is not None:
                 expt_mesh = _get_dp_tp_mesh(expt_dp_group, expt_tp_group, ep_size=ep_group.size())
                 expt_device_mesh = DeviceMesh.from_group(
                     [expt_dp_group, expt_tp_group],
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
index f8640446814..bdc98bebf3c 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
@@ -73,6 +73,34 @@ class TrainingState(Enum):
     IDLE = auto()
 
 
+def setup_delayed_wgrad_acc_hook(module, grad_acc_func):
+    """Configure delayed wgrad gradient processing for MoE expert parameters.
+
+    When ``overlap_dispatch_backward_with_experts_wgrad`` is enabled on a TransformerLayer,
+    this function:
+      1. Marks expert parameters so the normal post-accumulate-grad hook is skipped.
+      2. Registers a callback on the MoE layer that invokes FSDP's gradient
+         reduce-scatter after the delayed wgrad computation completes.
+
+    Args:
+        module: The module being processed in the forward pre-hook. Only
+            ``TransformerLayer`` instances with the delayed wgrad config flag
+            enabled are affected; all other modules are no-ops.
+        process_post_backward_gradients_fn: The FSDP gradient processing function
+            (``_process_post_backward_gradients``) to be called after the delayed
+            wgrad computation finishes.
+    """
+    from functools import partial
+
+    need_backward_dw = getattr(module, "need_backward_dw", lambda: False)
+    if not need_backward_dw():
+        return
+
+    for param in module.parameters():
+        if getattr(param, 'skip_backward_post_hook', False):
+            param.post_wgrad_grad_acc_hook = partial(grad_acc_func, [param])
+
+
 class MegatronFSDP(torch.nn.Module):
     """Fully Sharded Data Parallel training.
 
@@ -662,6 +690,23 @@ def _process_post_backward_gradients(param_list):
             """
             # Filter out shared parameters whose gradients are handled by the root hook.
             param_list = [p for p in param_list if not getattr(p, "_is_shared", False)]
+
+            # Filter out parameters whose gradient processing is deferred to a delayed
+            # wgrad accumulation hook (post_wgrad_grad_acc_hook).  If skip_backward_post_hook
+            # is set but the delayed hook was never installed, process the parameter
+            # immediately as a safety fallback to avoid silently dropping gradients.
+            param_list = [
+                p
+                for p in param_list
+                if not (
+                    getattr(p, 'skip_backward_post_hook', False)
+                    and hasattr(p, 'post_wgrad_grad_acc_hook')
+                )
+            ]
+
+            if not param_list:
+                return
+
             for param in param_list:
                 _grad_acc(param)
 
@@ -728,6 +773,7 @@ def _pre_forward_param_unshard(
                 prefetch=fsdp_forward_prefetch,
                 prefetch_order=PrefetchOrder.FORWARD_PASS_ORDER,
             )
+
             return args, kwargs
 
         @torch.compiler.disable
@@ -983,6 +1029,8 @@ def _register_pre_backward_param_unshard_hook(module):
 
         fsdp_modules = []
         for name, module in root_module.named_modules():
+            # Set post backward hook for TE grouped gemm if enabled comm overlap
+            setup_delayed_wgrad_acc_hook(module, _process_post_backward_gradients)
             if self.enable_fine_grained_param_gather_hook:
                 _register_pre_forward_param_unshard_hook(module)
                 _register_pre_backward_param_unshard_hook(module)
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
index 89c67f40d41..76e32b376f0 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
@@ -58,7 +58,6 @@
         QUANTIZED_MODEL_INIT_CLASS = fp8_model_init
 else:
     QUANTIZED_MODEL_INIT_CLASS = nullcontext
-
 # Detect the FP8 tensor class
 try:
     from transformer_engine.pytorch.tensor import QuantizedTensor
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
index 684cd7a99eb..b4e3d9becfb 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -2632,6 +2632,13 @@ def _reset_parameters(self, old_params, new_params):
                 if getattr(old_param, tp_attr, None) is not None:
                     setattr(new_param, tp_attr, getattr(old_param, tp_attr))
 
+            # For FSDP with delayed_wgrad_compute, `skip_backward_post_hook` needs
+            # to be reset on new param for correct grad accumulation of wgrad computation.
+            setattr(
+                new_param,
+                'skip_backward_post_hook',
+                getattr(old_param, 'skip_backward_post_hook', False),
+            )
         for item_id, p in enumerate(self.params):
             if p in param_map:
                 new_p = param_map[p]
@@ -2992,7 +2999,6 @@ def _batch_quantize_blockwise_fp8_params(
                 if is_blockwise_float8tensor(param):
                     fp8_params.append(param)
                     if model_param.numel() == 0:
-                        # Empty parameter.
                         shard_fp32_from_fp8.append(None)
                         shard_offsets_in_fp8.append(None)
                         shard_model_params.append([None, None])
diff --git a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
index b961a449d3e..6dd5ab6d342 100644
--- a/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
+++ b/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
@@ -514,10 +514,6 @@ def __init__(
         self.hsdp_outer_dp_shard = hsdp_outer_dp_shard
         self.expt_device_mesh = expt_device_mesh
 
-        # Handling the situation where M-Core MoE EP=1
-        if self.expt_device_mesh is None:
-            self.expt_device_mesh = device_mesh
-
         # Hybrid FSDP Process Groups
         # Retrieve the FSDP process group from the DeviceMesh.
         self.fsdp_group = (
diff --git a/megatron/core/distributed/param_and_grad_buffer.py b/megatron/core/distributed/param_and_grad_buffer.py
index 278a15d1a7c..5f975492806 100644
--- a/megatron/core/distributed/param_and_grad_buffer.py
+++ b/megatron/core/distributed/param_and_grad_buffer.py
@@ -238,6 +238,9 @@ def __init__(
         # or bucket.grad_data.
         self.cached_param_buffer_shard_list = [None] * len(self.buckets)
         self.cached_grad_buffer_shard_list = [None] * len(self.buckets)
+        # Track grad mode used to create cached param views. Rebuild if mode changes to avoid
+        # mixing no_grad-created views with in-place updates in grad-enabled mode.
+        self._cached_param_buffer_shards_grad_enabled = None
 
     def reset(self):
         """
@@ -394,6 +397,7 @@ def start_param_sync(self, force_sync: bool = False):
                     bucket.layerwise_gather_list = None
                     bucket._layerwise_src_buffer = None
                 self.param_gather_handle = None
+
         else:
             # Standard distributed optimizer path: use _coalescing_manager.
             # all_gather_into_tensor writes directly into a contiguous output buffer and
diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py
index 17358f8a921..4d46f1240f5 100644
--- a/megatron/core/extensions/transformer_engine.py
+++ b/megatron/core/extensions/transformer_engine.py
@@ -1559,21 +1559,20 @@ def forward(
         """Forward."""
         if packed_seq_params is not None:
             # If Dynamic CP group is provided, update TE DPA CP group
-            if packed_seq_params.cp_group is not None:
-                self.cp_group = packed_seq_params.cp_group
-                super().set_context_parallel_group(
-                    self.cp_group,
-                    torch.distributed.get_process_group_ranks(self.cp_group),
-                    TEDotProductAttention.cp_stream,
-                    self.cp_comm_type,
-                )
-            # If cp_group is None but local_cp_size is provided,
-            # Indicates to turn off CP dynamically
-            elif packed_seq_params.local_cp_size is not None:
-                assert (
-                    packed_seq_params.local_cp_size == 1
-                ), "local_cp_size must be == 1 if provided without cp_group"
-                super().set_context_parallel_group(None, None, None, self.cp_comm_type)
+            if packed_seq_params.local_cp_size is not None:
+                if packed_seq_params.local_cp_size == 1:
+                    super().set_context_parallel_group(None, None, None, self.cp_comm_type)
+                else:
+                    assert (
+                        packed_seq_params.cp_group is not None
+                    ), "cp_group is not set in packed_seq_params for dynamic CP"
+                    self.cp_group = packed_seq_params.cp_group
+                    super().set_context_parallel_group(
+                        self.cp_group,
+                        torch.distributed.get_process_group_ranks(self.cp_group),
+                        TEDotProductAttention.cp_stream,
+                        self.cp_comm_type,
+                    )
             self.kept_packed_seq_params.discard("cp_group")
             self.kept_packed_seq_params.discard("local_cp_size")
 
@@ -1707,10 +1706,14 @@ def __init__(
             self.disable_parameter_transpose_cache = self.config.disable_parameter_transpose_cache
 
             extra_kwargs = _get_extra_te_kwargs(config)
+            self.delay_wgrad_compute = (
+                self.config.delay_wgrad_compute
+                or self.config.overlap_dispatch_backward_with_experts_wgrad
+            )
 
-            if self.config.delay_wgrad_compute:
+            if self.delay_wgrad_compute:
                 if is_te_min_version("2.3.0"):
-                    extra_kwargs["delay_wgrad_compute"] = self.config.delay_wgrad_compute
+                    extra_kwargs["delay_wgrad_compute"] = True
                 else:
                     raise RuntimeError(
                         "Only TE with version >=2.3.0 supports delay_wgrad_compute now."
@@ -1772,6 +1775,50 @@ def __init__(
             for param in self.parameters():
                 setattr(param, "allreduce", not (is_expert and self.expert_parallel))
 
+            def normalize_grouped_parameter_keys(
+                self,
+                state_dict,
+                prefix,
+                local_metadata,
+                strict,
+                missing_keys,
+                unexpected_keys,
+                error_msgs,
+            ):
+                """Make grouped checkpoint keys compatible across parameter layouts."""
+
+                def maybe_remap_param(param_name: str, single_grouped: bool) -> None:
+                    grouped_key = f"{prefix}{param_name}"
+                    indexed_keys = [
+                        f"{prefix}{param_name}{gemm_idx}" for gemm_idx in range(self.num_gemms)
+                    ]
+                    has_grouped_key = grouped_key in state_dict
+                    has_any_indexed_key = any(key in state_dict for key in indexed_keys)
+                    has_all_indexed_keys = all(key in state_dict for key in indexed_keys)
+
+                    if single_grouped:
+                        if has_grouped_key or not has_all_indexed_keys:
+                            return
+                        state_dict[grouped_key] = torch.stack(
+                            [state_dict.pop(key) for key in indexed_keys], dim=0
+                        )
+                    else:
+                        if has_any_indexed_key or not has_grouped_key:
+                            return
+                        split_tensors = self._split_grouped_checkpoint_tensor(
+                            state_dict.pop(grouped_key), grouped_key
+                        )
+                        for gemm_idx, tensor in enumerate(split_tensors):
+                            state_dict[f"{prefix}{param_name}{gemm_idx}"] = tensor
+
+                maybe_remap_param("weight", getattr(self, "single_grouped_weight", False))
+                if self.use_bias:
+                    maybe_remap_param("bias", getattr(self, "single_grouped_bias", False))
+
+            self._register_load_state_dict_pre_hook(
+                normalize_grouped_parameter_keys, with_module=True
+            )
+
             # Explicitly stamp partition_dim and partition_stride on expert weight
             # tensors when explicit_expert_comm cleared parallel_mode.  TE ≤2.12
             # set these internally; TE ≥2.13 no longer does (parallel_mode=None
@@ -1877,6 +1924,31 @@ def merge_extra_states(
 
             self._register_load_state_dict_pre_hook(merge_extra_states, with_module=True)
 
+        def _split_grouped_checkpoint_tensor(
+            self, tensor: torch.Tensor, checkpoint_key: str
+        ) -> list[torch.Tensor]:
+            """Split grouped checkpoint tensor into one tensor per GEMM."""
+            if hasattr(tensor, "split_into_quantized_tensors") and callable(
+                tensor.split_into_quantized_tensors
+            ):
+                grouped_tensors = getattr(tensor, "quantized_tensors", None)
+                if grouped_tensors is None:
+                    grouped_tensors = tensor.split_into_quantized_tensors()
+                if len(grouped_tensors) != self.num_gemms:
+                    raise RuntimeError(
+                        f"Grouped checkpoint tensor {checkpoint_key} has {len(grouped_tensors)} "
+                        f"groups, expected {self.num_gemms}."
+                    )
+                return list(grouped_tensors)
+            if tensor.ndim > 0 and tensor.shape[0] == self.num_gemms:
+                return list(tensor.unbind(dim=0))
+            if tensor.ndim > 0 and tensor.shape[0] % self.num_gemms == 0:
+                return list(torch.chunk(tensor, self.num_gemms, dim=0))
+            raise RuntimeError(
+                f"Cannot split checkpoint tensor {checkpoint_key} with shape {tuple(tensor.shape)} "
+                f"into {self.num_gemms} GEMM shards."
+            )
+
         def finish_init(self, quantization_config: QuantizationConfig):
             """Post-init of quantization override"""
             if quantization_config is None:
@@ -1981,6 +2053,21 @@ def _sharded_state_dict_grouped(
             singleton_local_shards = (metadata or {}).get('singleton_local_shards', False)
             sharded_state_dict = {}
             full_state_dict = self.state_dict(prefix="", keep_vars=True)
+            grouped_split_cache = {}
+
+            def get_gemm_tensor(param_name: str, gemm_idx: int) -> torch.Tensor:
+                indexed_name = f"{param_name}{gemm_idx}"
+                if indexed_name in full_state_dict:
+                    return full_state_dict[indexed_name]
+                if param_name not in full_state_dict:
+                    raise KeyError(indexed_name)
+                if param_name not in grouped_split_cache:
+                    grouped_split_cache[param_name] = self._split_grouped_checkpoint_tensor(
+                        full_state_dict[param_name], param_name
+                    )
+                grouped_splits = grouped_split_cache[param_name]
+                return grouped_splits[gemm_idx]
+
             num_global_experts = get_pg_size(self._pg_collection.ep) * self.num_gemms
             local_expert_indices_offset = get_pg_rank(self._pg_collection.ep) * self.num_gemms
             ep_axis = len(sharded_offsets)
@@ -1988,11 +2075,11 @@ def _sharded_state_dict_grouped(
             for gemm_idx in range(self.num_gemms):
                 global_expert_idx = local_expert_indices_offset + gemm_idx
                 state_dict = {
-                    f"{gemm_idx}.weight": full_state_dict[f"weight{gemm_idx}"],
+                    f"{gemm_idx}.weight": get_gemm_tensor("weight", gemm_idx),
                     f"{gemm_idx}._extra_state": extra_states[gemm_idx],
                 }
                 if self.use_bias:
-                    state_dict[f"{gemm_idx}.bias"] = full_state_dict[f"bias{gemm_idx}"]
+                    state_dict[f"{gemm_idx}.bias"] = get_gemm_tensor("bias", gemm_idx)
                 if singleton_local_shards:
                     expert_prefix = f"{global_expert_idx}.{prefix}"
                     new_sharded_offsets = sharded_offsets
@@ -2040,7 +2127,7 @@ def backward_dw(self):
             Compute weight gradients during the backward pass
             if delay_wgrad_compute is enabled.
             """
-            if self.config.delay_wgrad_compute:
+            if self.delay_wgrad_compute:
                 super().backward_dw()
 
     class TEColumnParallelGroupedLinear(TEGroupedLinear):
@@ -2806,3 +2893,24 @@ def set_save_original_input(module):
     from transformer_engine.pytorch.float8_tensor import Float8Tensor
 except ImportError:
     Float8Tensor = None
+
+
+def get_thd_partitioned_indices(cu_seqlens, total_tokens, cp_size, cp_rank):
+    """Get partitioned indices for THD format data in context parallel.
+
+    Args:
+        cu_seqlens: Cumulative sequence lengths tensor.
+        total_tokens: Total number of tokens.
+        cp_size: Context parallel world size.
+        cp_rank: Context parallel rank.
+
+    Returns:
+        Partitioned indices tensor.
+    """
+    assert is_te_min_version("1.10.0"), (
+        "Please update Transformer Engine to >= 1.10 to use "
+        "Context Parallel with THD format data"
+    )
+    import transformer_engine_torch as tex
+
+    return tex.thd_get_partitioned_indices(cu_seqlens, total_tokens, cp_size, cp_rank)
diff --git a/megatron/core/extensions/transformer_engine_spec_provider.py b/megatron/core/extensions/transformer_engine_spec_provider.py
index 04228e02e88..c365fb4835d 100644
--- a/megatron/core/extensions/transformer_engine_spec_provider.py
+++ b/megatron/core/extensions/transformer_engine_spec_provider.py
@@ -19,6 +19,7 @@
 from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
 from megatron.core.models.backends import BackendSpecProvider
 from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
+from megatron.core.transformer.dot_product_attention import DotProductAttention
 from megatron.core.transformer.mlp import MLPSubmodules, TEActivationFunctionBuilder
 from megatron.core.transformer.moe.experts import GroupedMLPSubmodules, SequentialMLP, TEGroupedMLP
 from megatron.core.transformer.moe.moe_layer import ExpertsBuilder
@@ -36,6 +37,10 @@ def __new__(cls, *args, **kwargs):
 class TESpecProvider(BackendSpecProvider):
     """A protocol for providing the submodules used in Spec building."""
 
+    def __init__(self, fallback_to_eager_attn: bool = False):
+        super().__init__()
+        self.fallback_to_eager_attn = fallback_to_eager_attn
+
     def linear(self) -> type:
         """Which linear module TE backend uses"""
         return TELinear
@@ -70,6 +75,8 @@ def layer_norm(
 
     def core_attention(self) -> type:
         """Which module to use for attention"""
+        if self.fallback_to_eager_attn:
+            return DotProductAttention
         return TEDotProductAttention
 
     def grouped_mlp_modules(self, moe_use_grouped_gemm: bool) -> ExpertsBuilder:
diff --git a/megatron/core/fusions/fused_bias_dropout.py b/megatron/core/fusions/fused_bias_dropout.py
index 2eb4007f75c..1f2448d86be 100644
--- a/megatron/core/fusions/fused_bias_dropout.py
+++ b/megatron/core/fusions/fused_bias_dropout.py
@@ -1,10 +1,13 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
-from typing import Optional, Tuple
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+from typing import TYPE_CHECKING, Optional, Tuple
 
 import torch
 
 from megatron.core.jit import jit_fuser
 
+if TYPE_CHECKING:
+    from megatron.core.tensor_parallel.random import CheckpointManager
+
 # pylint: disable=missing-function-docstring
 
 
@@ -80,7 +83,26 @@ def bias_dropout_add_fused_inference(
     return _bias_dropout_add_func(x_with_bias, residual, prob, False)
 
 
-def get_bias_dropout_add(training, fused):
+def get_bias_dropout_add(
+    training, fused, mhc_recompute_manager: Optional['CheckpointManager'] = None
+):
+    """
+    Get the bias-dropout-add function.
+
+    Args:
+        training: Whether in training mode.
+        fused: Whether to use fused implementation.
+        mhc_recompute_manager: Optional CheckpointManager for checkpoint management.
+            When provided, the returned function will wrap the BDA operation with
+            CheckpointWithoutOutput for memory-efficient recomputation.
+
+    Returns:
+        A callable that performs bias-dropout-add operation.
+    """
+    if mhc_recompute_manager is not None:
+        # Return a checkpointed version that handles tuple unpacking internally
+        return _get_checkpointed_bda(training, fused, mhc_recompute_manager)
+
     if fused:
         # jit scripting for a nn.module (with dropout) is not
         # triggering the fusion kernel. For now, we use two
@@ -92,3 +114,68 @@ def get_bias_dropout_add(training, fused):
             return bias_dropout_add_fused_inference
     else:
         return bias_dropout_add_unfused(training)
+
+
+def _get_checkpointed_bda(training, fused, mhc_recompute_manager: 'CheckpointManager'):
+    """
+    Create a checkpointed bias-dropout-add function.
+
+    This function handles:
+    1. Tuple unpacking for x_with_bias (required because save_for_backward can't save tuples)
+    2. Non-tensor arguments like dropout probability (handled by CheckpointWithoutOutput)
+    3. Auto-registration to the CheckpointManager
+
+    Args:
+        training: Whether in training mode.
+        fused: Whether to use fused implementation.
+        mhc_recompute_manager: CheckpointManager for checkpoint management.
+
+    Returns:
+        A callable that performs checkpointed bias-dropout-add operation.
+    """
+    from megatron.core.tensor_parallel.random import CheckpointWithoutOutput
+
+    # Get the underlying BDA function
+    if fused:
+        if training:
+            bda_func = bias_dropout_add_fused_train
+        else:
+            bda_func = bias_dropout_add_fused_inference
+    else:
+        bda_func = bias_dropout_add_unfused(training)
+
+    def _checkpointed_bda(x_with_bias, residual, prob):
+        """
+        Checkpointed BDA that handles tuple unpacking internally.
+
+        Args:
+            x_with_bias: Either a tuple (x, bias) or a single tensor x.
+            residual: Residual tensor.
+            prob: Dropout probability.
+
+        Returns:
+            Output tensor after bias-dropout-add.
+        """
+        # Create checkpoint with manager
+        ckpt = CheckpointWithoutOutput(ckpt_manager=mhc_recompute_manager)
+
+        # Handle case where x_with_bias might be a single tensor (e.g., from IdentityOp)
+        if isinstance(x_with_bias, tuple):
+            x, bias = x_with_bias
+        else:
+            x = x_with_bias
+            bias = None
+
+        # Wrapper function that re-packs the tuple for the actual BDA function
+        def _bda_wrapper(output, bias, res, dropout):
+            return bda_func((output, bias), res, dropout)
+
+        # Call checkpoint with unpacked arguments
+        result = ckpt.checkpoint(_bda_wrapper, x, bias, residual, prob)
+
+        # No-op when manager is set - manager handles all discarding uniformly
+        ckpt.discard_output_and_register_recompute(result)
+
+        return result
+
+    return _checkpointed_bda
diff --git a/megatron/core/fusions/fused_linear_cross_entropy.py b/megatron/core/fusions/fused_linear_cross_entropy.py
new file mode 100644
index 00000000000..b533fef7aa3
--- /dev/null
+++ b/megatron/core/fusions/fused_linear_cross_entropy.py
@@ -0,0 +1,242 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+"""
+Linear Cross Entropy API
+Fuse cross entropy with linear layer.
+"""
+
+import typing
+from functools import lru_cache
+
+import torch
+
+
+class Platform:
+    """
+    Singleton class for targeted GPU platform.
+    """
+
+    _instance: typing.Optional["Platform"] = None
+
+    def __new__(cls) -> "Platform":
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    def __init__(self) -> None:
+        if getattr(self, "_initialized", False):
+            return
+
+        assert torch.cuda.is_available(), "CUDA is not available"
+        device = torch.cuda.current_device()
+        cc = torch.cuda.get_device_capability(device)
+
+        if cc[0] == 10:
+            from .linear_cross_entropy.blackwell import entry as gpu_entry
+
+            self.forward_func: typing.Callable[..., typing.Any] = gpu_entry.forward
+            self.backward_func: typing.Callable[..., typing.Any] = gpu_entry.backward
+        else:
+            raise ValueError(f"Unsupported architecture: {cc[0]}")
+
+        self._initialized = True
+
+
+@lru_cache(maxsize=1)
+def _get_platform() -> Platform:
+    """
+    Helper function to lazy initialize the platform.
+    """
+    return Platform()
+
+
+class LinearCrossEntropy(torch.autograd.Function):
+    """
+    This class implements a custom autograd function for linear and cross entropy,
+    whose equivalent logic in PyTorch is:
+        ```python
+        def torch_entropy(hidden, weight, labels):
+            logits = torch.matmul(hidden, weight)
+            logprobs = torch.nn.functional.cross_entropy(logits, labels)
+            return logprobs
+        ```
+    """
+
+    @staticmethod
+    def forward(
+        ctx,
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: torch.Tensor,
+        tp_group: typing.Optional[torch.distributed.ProcessGroup] = None,
+        reduction: typing.Literal["none", "sum", "mean"] = "mean",
+        ignore_index: int = -100,
+        sequence_parallel: bool = False,
+    ) -> torch.Tensor:
+        """
+        The forward pass of the Linear Cross Entropy.
+        If tp_group is not None, the weight tensor to each TP rank should be
+        (global_vocab_size // world_size, dim).
+        Note that each of the ranks should get equal shards along the vocab_size dimension.
+
+        Args:
+            @param hidden: the input tensor with shape (num_tokens, dim)
+            @param weight: the lm_head weight tensor with shape (local_vocab_size, dim)
+            @param labels: the labels tensor with shape (num_tokens,)
+            @param tp_group: the distributed process group for TP.
+            @param reduction: Default to "mean", and can be one of "none", "sum", "mean".
+            @param ignore_index: The index to ignore. Default to -100.
+            @param sequence_parallel: Whether to use sequence parallel. Default to False.
+        Returns:
+            @return: logprobs with shape
+                - either (num_tokens,) when reduction is "none"
+                - or (1,) when reduction is "mean" or "sum"
+
+        tp_group is None ----------------------------------> DP
+                B
+            A   C
+        tp_group is not None & sequence_parallel is False -> TP
+                B0  B1
+            A   C0  C1
+        tp_group is not None & sequence_parallel is True --> SP
+                B0  B1
+            A0  C0  XX
+            A1  XX  C1
+
+        When tp_group is not None, the weight tensor will be split along the vocab_size
+        dimension, which means each rank will get equal shards along the global_vocab_size
+        dimension. Specifically, the weight tensor to each rank will be (local_vocab_size, dim).
+        And there is an assumption that each rank will get the same local_vocab_size.
+
+        When sequence_parallel is True, the hidden tensor will be split along the
+        sequence length dimension, which means each rank will get equal shards along
+        the sequence length dimension. Specifically, the hidden tensor to each rank
+        will be (local_num_tokens, dim). And there is an assumption that each rank
+        will get the same local_num_tokens.
+
+        In TP forward pass, the hidden tensor and label tensor shall be identical
+        among all TP ranks, and it's user's responsibility to ensure the hidden tensor
+        is identical among all TP ranks. Then this operation will produce identical
+        logprobs among all TP ranks.
+
+        In TP backward pass, the gradient of the logprobs shall be identical among all
+        TP ranks, and it's user's responsibility to ensure the gradient of the logprobs
+        is identical among all TP ranks. Then this operation will produce distinct gradients
+        for the local weight tensor, and identical gradients for the hidden tensor.
+
+        ```python
+        # ------------ forward pass ------------ #
+        hidden = tp_group.broadcast(hidden, src=0) # handled by framework
+        labels = tp_group.broadcast(labels, src=0) # handled by framework
+        logprobs = linear_cross_entropy(...)
+        # each rank will get the same logprobs
+
+        # ------------ backward pass ------------ #
+        g_logprobs = tp_group.broadcast(g_logprobs, src=0) # handled by framework
+        d_hidden, d_weight = torch.autograd.grad(...)
+        # each rank will get the same d_hidden,
+        # and distinct d_weight for local weight shard
+        ```
+
+        In SP forward pass, the hidden tensor shall be split along the sequence length dimension,
+        and the label tensor shall be identical among all TP ranks.
+        Then this operation will produce identical logprobs among all TP ranks.
+
+        In SP backward pass, the gradient of the logprobs shall be identical among all TP ranks,
+        Then this operation will produce distinct gradients for the local hidden tensor
+        and local weight tensor.
+        ```python
+        # ------------ forward pass ------------ #
+        hidden = global_hidden[tp_rank] # handled by framework
+        labels = tp_group.broadcast(labels, src=0) # handled by framework
+        logprobs = linear_cross_entropy(...)
+        # each rank will get the same logprobs
+
+        # ------------ backward pass ------------ #
+        g_logprobs = tp_group.broadcast(g_logprobs, src=0) # handled by framework
+        d_hidden, d_weight = torch.autograd.grad(...)
+        # each rank will get distinct local d_hidden and d_weight
+        ```
+        """
+        with torch.cuda.nvtx.range("LinearCrossEntropy-forward"):
+            (
+                logprobs,
+                _maximum,
+                _acc,
+                _num_valid_tokens,
+                tp_rank,
+                tp_world_size,
+                global_hidden,
+            ) = _get_platform().forward_func(
+                hidden, weight, labels, tp_group, reduction, ignore_index, sequence_parallel
+            )
+            ctx.save_for_backward(global_hidden, weight, labels, _maximum, _acc, _num_valid_tokens)
+            ctx.tp_group = tp_group
+            ctx.ignore_index = ignore_index
+            ctx.reduction = reduction
+            ctx.tp_rank = tp_rank
+            ctx.tp_world_size = tp_world_size
+            ctx.sequence_parallel = sequence_parallel
+
+        return logprobs
+
+    @staticmethod
+    def backward(
+        ctx, dlogprobs: torch.Tensor
+    ) -> typing.Tuple[torch.Tensor, torch.Tensor, None, None, None, None, None]:
+        """
+        The backward pass of the Linear Cross Entropy.
+        Args:
+            dlogprobs (torch.Tensor): The gradient of the cross entropy, with shape
+                - either (num_tokens,) when reduction is "none"
+                - or (1,) when reduction is "mean" or "sum"
+        Returns:
+            dhidden (torch.Tensor): The gradient of the hidden.
+            dweight (torch.Tensor): The gradient of the weight.
+        """
+        with torch.cuda.nvtx.range("LinearCrossEntropy-backward"):
+            (global_hidden, weight, labels, _maximum, _accu, _num_valid_tokens) = ctx.saved_tensors
+
+            tp_group = ctx.tp_group
+            ignore_index = ctx.ignore_index
+            reduction = ctx.reduction
+            tp_rank = ctx.tp_rank
+            tp_world_size = ctx.tp_world_size
+            sequence_parallel = ctx.sequence_parallel
+
+            d_hidden, d_weight = _get_platform().backward_func(
+                dlogprobs,
+                global_hidden,
+                weight,
+                labels,
+                _maximum,
+                _accu,
+                _num_valid_tokens,
+                reduction,
+                ignore_index,
+                tp_group,
+                tp_rank,
+                tp_world_size,
+                sequence_parallel,
+            )
+
+        return d_hidden, d_weight, None, None, None, None, None
+
+
+def linear_cross_entropy(
+    hidden: torch.Tensor,
+    weight: torch.Tensor,
+    labels: torch.Tensor,
+    tp_group: typing.Optional[torch.distributed.ProcessGroup] = None,
+    reduction: typing.Literal["none", "sum", "mean"] = "mean",
+    ignore_index: int = -100,
+    sequence_parallel: bool = False,
+) -> torch.Tensor:
+    """
+    helper function for linear cross entropy.
+    """
+    _impl = LinearCrossEntropy.apply
+    return _impl(hidden, weight, labels, tp_group, reduction, ignore_index, sequence_parallel)
+
+
+__all__ = ["linear_cross_entropy", "LinearCrossEntropy"]
diff --git a/megatron/core/fusions/fused_mhc_kernels.py b/megatron/core/fusions/fused_mhc_kernels.py
new file mode 100644
index 00000000000..6a19255196a
--- /dev/null
+++ b/megatron/core/fusions/fused_mhc_kernels.py
@@ -0,0 +1,964 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""Fused cuTile kernels for mHC (Manifold-Constrained Hyper-Connections).
+
+Requires cuda.tile (cuTile) for optimal performance on supported GPUs
+(compute capability 10.x+).  Reference (non-fused) implementations live in
+``megatron.core.transformer.hyper_connection`` and are used when cuTile is
+unavailable or when the ``use_fused_mhc`` config flag is False.
+
+Four fused operations:
+  - sinkhorn:     Sinkhorn-Knopp projection to doubly stochastic matrix
+  - h_aggregate:  weighted n-stream -> 1-stream aggregation
+  - h_post_bda:   fused H_res @ residual + H_post * (x + bias)
+  - proj_rms:     fused projection + RMS normalization
+"""
+
+import math
+from typing import Optional, Tuple
+
+import torch
+from torch import Tensor
+
+# ---------------------------------------------------------------------------
+# Check cuTile availability
+# ---------------------------------------------------------------------------
+_CUTILE_AVAILABLE = False
+try:
+    import cuda.tile as ct
+
+    _CUTILE_AVAILABLE = True
+except ImportError:
+    pass
+
+
+def is_cutile_available() -> bool:
+    """Return True if cuTile fused kernels are available."""
+    return _CUTILE_AVAILABLE
+
+
+# ============================================================================
+# CuTile implementations (only defined when cuda.tile is available)
+# ============================================================================
+
+if _CUTILE_AVAILABLE:
+    ConstInt = ct.Constant[int]
+    PAD_ZERO = ct.PaddingMode.ZERO
+    LOG2E = 1.4426950408889634
+
+    # -- Sinkhorn kernels ----------------------------------------------------
+
+    @ct.kernel
+    def _ct_sinkhorn_fwd_kernel(
+        inp, out, M_init_out, eps, HC: ConstInt, NUM_ITERS: ConstInt, TILE_SIZE: ConstInt
+    ):
+        pid = ct.bid(0)
+        logits = ct.load(inp, index=(pid, 0, 0), shape=(TILE_SIZE, HC, HC)).astype(ct.float32)
+        row_max = ct.max(logits, axis=2, keepdims=True)
+        M = ct.exp2((logits - row_max) * LOG2E)
+        ct.store(
+            M_init_out,
+            index=(pid, 0, 0),
+            tile=ct.reshape(M.astype(M_init_out.dtype), (TILE_SIZE, HC, HC)),
+        )
+        for _ in range(NUM_ITERS):
+            row_sum = ct.sum(M, axis=2, keepdims=True)
+            M = M / (row_sum + eps)
+            col_sum = ct.sum(M, axis=1, keepdims=True)
+            M = M / (col_sum + eps)
+        ct.store(out, index=(pid, 0, 0), tile=ct.reshape(M.astype(out.dtype), (TILE_SIZE, HC, HC)))
+
+    @ct.kernel
+    def _ct_sinkhorn_bwd_kernel(
+        grad_out,
+        M_init,
+        grad_inp,
+        ws_M,
+        ws_rs,
+        ws_cs,
+        eps,
+        HC: ConstInt,
+        NUM_ITERS: ConstInt,
+        TILE_SIZE: ConstInt,
+    ):
+        pid = ct.bid(0)
+        M_base = pid * (2 * NUM_ITERS)
+        v_base = pid * NUM_ITERS
+
+        M = ct.load(M_init, index=(pid, 0, 0), shape=(TILE_SIZE, HC, HC)).astype(ct.float32)
+        for t in range(NUM_ITERS):
+            ct.store(ws_M, index=(M_base + 2 * t, 0, 0), tile=M)
+            row_sum = ct.sum(M, axis=2, keepdims=True)
+            ct.store(ws_rs, index=(v_base + t, 0, 0), tile=row_sum)
+            M = M / (row_sum + eps)
+            ct.store(ws_M, index=(M_base + 2 * t + 1, 0, 0), tile=M)
+            col_sum = ct.sum(M, axis=1, keepdims=True)
+            ct.store(ws_cs, index=(v_base + t, 0, 0), tile=col_sum)
+            M = M / (col_sum + eps)
+
+        grad = ct.load(grad_out, index=(pid, 0, 0), shape=(TILE_SIZE, HC, HC)).astype(ct.float32)
+        for t_rev in range(NUM_ITERS):
+            t = NUM_ITERS - 1 - t_rev
+            col_s = ct.load(ws_cs, index=(v_base + t, 0, 0), shape=(TILE_SIZE, 1, HC))
+            grad = grad / (col_s + eps)
+            col_corr = ct.sum(grad * M, axis=1, keepdims=True)
+            grad = grad - col_corr
+            M = ct.load(ws_M, index=(M_base + 2 * t + 1, 0, 0), shape=(TILE_SIZE, HC, HC))
+            row_s = ct.load(ws_rs, index=(v_base + t, 0, 0), shape=(TILE_SIZE, HC, 1))
+            grad = grad / (row_s + eps)
+            row_corr = ct.sum(grad * M, axis=2, keepdims=True)
+            grad = grad - row_corr
+            M = ct.load(ws_M, index=(M_base + 2 * t, 0, 0), shape=(TILE_SIZE, HC, HC))
+        grad = grad * M
+        ct.store(grad_inp, index=(pid, 0, 0), tile=grad.astype(grad_inp.dtype))
+
+    def _cutile_sinkhorn_fwd(
+        input_logits: Tensor, num_iterations: int, eps: float = 1e-8
+    ) -> Tuple[Tensor, Tensor]:
+        original_shape = input_logits.shape
+        hc = original_shape[-1]
+        N_batch = input_logits.numel() // (hc * hc)
+        TILE_SIZE = math.gcd(N_batch, 128)
+        dev = input_logits.device
+        out = torch.empty(N_batch, hc, hc, dtype=input_logits.dtype, device=dev)
+        M_init = torch.empty(N_batch, hc, hc, dtype=input_logits.dtype, device=dev)
+        ct.launch(
+            torch.cuda.current_stream(),
+            (math.ceil(N_batch / TILE_SIZE), 1, 1),
+            _ct_sinkhorn_fwd_kernel,
+            (input_logits.view(N_batch, hc, hc), out, M_init, eps, hc, num_iterations, TILE_SIZE),
+        )
+        return out.view(original_shape), M_init.view(original_shape)
+
+    def _cutile_sinkhorn_bwd(
+        grad_output: Tensor, M_init: Tensor, num_iterations: int, eps: float = 1e-8
+    ) -> Tensor:
+        original_shape = grad_output.shape
+        hc = original_shape[-1]
+        N_batch = grad_output.numel() // (hc * hc)
+        TILE_SIZE = math.gcd(N_batch, 128)
+        dev = grad_output.device
+        ws_M = torch.empty(N_batch * 2 * num_iterations, hc, hc, dtype=torch.float32, device=dev)
+        ws_rs = torch.empty(N_batch * num_iterations, hc, 1, dtype=torch.float32, device=dev)
+        ws_cs = torch.empty(N_batch * num_iterations, 1, hc, dtype=torch.float32, device=dev)
+        grad_input = torch.empty(N_batch, hc, hc, dtype=grad_output.dtype, device=dev)
+        ct.launch(
+            torch.cuda.current_stream(),
+            (math.ceil(N_batch / TILE_SIZE), 1, 1),
+            _ct_sinkhorn_bwd_kernel,
+            (
+                grad_output.view(N_batch, hc, hc),
+                M_init.view(N_batch, hc, hc),
+                grad_input,
+                ws_M,
+                ws_rs,
+                ws_cs,
+                eps,
+                hc,
+                num_iterations,
+                TILE_SIZE,
+            ),
+        )
+        return grad_input.view(original_shape)
+
+    # -- H_aggregate kernels -------------------------------------------------
+
+    @ct.kernel
+    def _ct_h_agg_fwd_kernel(x, h_pre, out, N: ConstInt, TILE_M: ConstInt, TILE_C: ConstInt):
+        pid = ct.bid(0)
+        num_tiles = ct.num_tiles(x, axis=2, shape=(TILE_M, N, TILE_C))
+        h_tile = ct.load(h_pre, index=(pid, 0), shape=(TILE_M, N), padding_mode=PAD_ZERO)
+        h_tile = ct.expand_dims(h_tile, axis=2)
+        for j in range(num_tiles):
+            x_tile = ct.load(x, index=(pid, 0, j), shape=(TILE_M, N, TILE_C), padding_mode=PAD_ZERO)
+            acc = ct.sum(x_tile * h_tile, axis=1).astype(ct.float32)
+            ct.store(out, index=(pid, j), tile=acc.astype(out.dtype))
+
+    @ct.kernel
+    def _ct_h_agg_bwd_kernel(go, x, h_pre, gx, gh, N: ConstInt, TILE_M: ConstInt, TILE_C: ConstInt):
+        pid = ct.bid(0)
+        num_c_tiles = ct.num_tiles(go, axis=1, shape=(TILE_M, TILE_C))
+        h_tile = ct.load(h_pre, index=(pid, 0), shape=(TILE_M, N), padding_mode=PAD_ZERO)
+        h_expanded = ct.expand_dims(h_tile, axis=2)
+        gh_acc = ct.full((TILE_M, N), 0, dtype=ct.float32)
+        for ct_idx in range(num_c_tiles):
+            go_tile = ct.load(
+                go, index=(pid, ct_idx), shape=(TILE_M, TILE_C), padding_mode=PAD_ZERO
+            )
+            go_expanded = ct.expand_dims(go_tile, axis=1)
+            x_tile = ct.load(
+                x, index=(pid, 0, ct_idx), shape=(TILE_M, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            gx_tile = go_expanded * h_expanded
+            ct.store(gx, index=(pid, 0, ct_idx), tile=gx_tile.astype(gx.dtype))
+            gh_acc += ct.sum(go_expanded * x_tile, axis=2)
+        ct.store(gh, index=(pid, 0), tile=gh_acc.astype(gh.dtype))
+
+    def _cutile_h_aggregate_fwd(x: Tensor, h_pre: Tensor) -> Tensor:
+        s, b, n, C = x.shape
+        sb = s * b
+        TILE_SIZE = math.gcd(sb, 4)
+        TILE_C = math.gcd(C, 1024)
+        out = torch.empty(sb, C, dtype=x.dtype, device=x.device)
+        ct.launch(
+            torch.cuda.current_stream(),
+            (math.ceil(sb / TILE_SIZE),),
+            _ct_h_agg_fwd_kernel,
+            (x.view(sb, n, C), h_pre.view(sb, n), out, n, TILE_SIZE, TILE_C),
+        )
+        return out.view(s, b, C)
+
+    def _cutile_h_aggregate_bwd(
+        grad_output: Tensor, x: Tensor, h_pre: Tensor
+    ) -> Tuple[Tensor, Tensor]:
+        s, b, n, C = x.shape
+        sb = s * b
+        TILE_C = math.gcd(C, 1024)
+        TILE_M = math.gcd(sb, 4)
+        gx = torch.empty(sb, n, C, dtype=x.dtype, device=x.device)
+        gh = torch.empty(sb, n, dtype=x.dtype, device=x.device)
+        ct.launch(
+            torch.cuda.current_stream(),
+            (math.ceil(sb / TILE_M),),
+            _ct_h_agg_bwd_kernel,
+            (
+                grad_output.view(sb, C),
+                x.view(sb, n, C),
+                h_pre.view(sb, n),
+                gx,
+                gh,
+                n,
+                TILE_M,
+                TILE_C,
+            ),
+        )
+        return gx.view(s, b, n, C), gh.view(s, b, n)
+
+    # -- H_post BDA kernels --------------------------------------------------
+
+    @ct.kernel
+    def _ct_hpb_fwd_kernel(
+        hr, orig, hp, x, out, N: ConstInt, TILE_C: ConstInt, TILE_SIZE: ConstInt
+    ):
+        pid = ct.bid(0)
+        num_c_tiles = ct.num_tiles(x, axis=1, shape=(TILE_SIZE, TILE_C))
+        hp_tile = ct.load(hp, index=(pid, 0), shape=(TILE_SIZE, N), padding_mode=PAD_ZERO)
+        hp_2d = ct.reshape(hp_tile, (N, 1))
+        hr_tile = ct.load(hr, index=(pid, 0, 0), shape=(TILE_SIZE, N, N), padding_mode=PAD_ZERO)
+        hr_2d = ct.reshape(hr_tile, (N, N))
+        for ct_idx in range(num_c_tiles):
+            orig_tile = ct.load(
+                orig, index=(pid, 0, ct_idx), shape=(TILE_SIZE, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            orig_2d = ct.reshape(orig_tile, (N, TILE_C))
+            x_tile = ct.load(
+                x, index=(pid, ct_idx), shape=(TILE_SIZE, TILE_C), padding_mode=PAD_ZERO
+            )
+            x_2d = ct.reshape(x_tile, (1, TILE_C))
+            out_2d = hp_2d * x_2d
+            for j in range(N):
+                out_2d += ct.extract(hr_2d, (0, j), shape=(N, 1)) * ct.extract(
+                    orig_2d, (j, 0), shape=(1, TILE_C)
+                )
+            ct.store(
+                out,
+                index=(pid, 0, ct_idx),
+                tile=ct.reshape(out_2d, (TILE_SIZE, N, TILE_C)).astype(out.dtype),
+            )
+
+    @ct.kernel
+    def _ct_hpb_fwd_bias_kernel(
+        hr, orig, hp, x, bias, out, N: ConstInt, TILE_C: ConstInt, TILE_SIZE: ConstInt
+    ):
+        pid = ct.bid(0)
+        num_c_tiles = ct.num_tiles(x, axis=1, shape=(TILE_SIZE, TILE_C))
+        hp_tile = ct.load(hp, index=(pid, 0), shape=(TILE_SIZE, N), padding_mode=PAD_ZERO)
+        hp_2d = ct.reshape(hp_tile, (N, 1))
+        hr_tile = ct.load(hr, index=(pid, 0, 0), shape=(TILE_SIZE, N, N), padding_mode=PAD_ZERO)
+        hr_2d = ct.reshape(hr_tile, (N, N))
+        for ct_idx in range(num_c_tiles):
+            orig_tile = ct.load(
+                orig, index=(pid, 0, ct_idx), shape=(TILE_SIZE, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            orig_2d = ct.reshape(orig_tile, (N, TILE_C))
+            x_tile = ct.load(
+                x, index=(pid, ct_idx), shape=(TILE_SIZE, TILE_C), padding_mode=PAD_ZERO
+            )
+            bias_tile = ct.load(bias, index=(ct_idx,), shape=(TILE_C,), padding_mode=PAD_ZERO)
+            xb_2d = ct.reshape(x_tile, (1, TILE_C)) + ct.reshape(bias_tile, (1, TILE_C))
+            out_2d = hp_2d * xb_2d
+            for j in range(N):
+                out_2d += ct.extract(hr_2d, (0, j), shape=(N, 1)) * ct.extract(
+                    orig_2d, (j, 0), shape=(1, TILE_C)
+                )
+            ct.store(
+                out,
+                index=(pid, 0, ct_idx),
+                tile=ct.reshape(out_2d, (TILE_SIZE, N, TILE_C)).astype(out.dtype),
+            )
+
+    @ct.kernel
+    def _ct_hpb_bwd_kernel(
+        go,
+        hr,
+        orig,
+        hp,
+        x,
+        g_hr,
+        g_orig,
+        g_hp,
+        g_x,
+        N: ConstInt,
+        TILE_C: ConstInt,
+        TILE_SIZE: ConstInt,
+    ):
+        pid = ct.bid(0)
+        num_c_tiles = ct.cdiv(go.shape[2], TILE_C)
+        hp_tile = ct.load(hp, index=(pid, 0), shape=(TILE_SIZE, N))
+        hp_2d = ct.reshape(hp_tile, (1, N))
+        hr_tile = ct.load(hr, index=(pid, 0, 0), shape=(TILE_SIZE, N, N), padding_mode=PAD_ZERO)
+        hr_2d = ct.reshape(hr_tile, (N, N))
+        acc_g_hp_2d = ct.full((N, 1), 0, dtype=ct.float32)
+        acc_g_hr_2d = ct.full((N, N), 0, dtype=ct.float32)
+        for ct_idx in range(num_c_tiles):
+            x_tile = ct.load(
+                x, index=(pid, ct_idx), shape=(TILE_SIZE, TILE_C), padding_mode=PAD_ZERO
+            )
+            x_2d = ct.reshape(x_tile, (1, TILE_C))
+            go_tile = ct.load(
+                go, index=(pid, 0, ct_idx), shape=(TILE_SIZE, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            go_2d = ct.reshape(go_tile, (N, TILE_C))
+            orig_tile = ct.load(
+                orig, index=(pid, 0, ct_idx), shape=(TILE_SIZE, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            orig_2d = ct.reshape(orig_tile, (N, TILE_C))
+            g_x_2d = ct.full((1, TILE_C), 0, dtype=hp.dtype)
+            g_orig_2d = ct.full((N, TILE_C), 0, dtype=hp.dtype)
+            for j in range(N):
+                g_x_2d += ct.extract(hp_2d, (0, j), shape=(1, 1)).item() * ct.extract(
+                    go_2d, (j, 0), shape=(1, TILE_C)
+                )
+                g_orig_2d += ct.extract(hr_2d, (j, 0), shape=(1, N)).reshape((N, 1)) * ct.extract(
+                    go_2d, (j, 0), shape=(1, TILE_C)
+                )
+            acc_g_hp_2d += ct.sum(go_2d * x_2d, axis=1, keepdims=True)
+            acc_g_hr_2d += ct.sum(
+                ct.expand_dims(go_2d, axis=1) * ct.expand_dims(orig_2d, axis=0), axis=2
+            )
+            ct.store(
+                g_x,
+                index=(pid, ct_idx),
+                tile=ct.reshape(g_x_2d, (TILE_SIZE, TILE_C)).astype(g_x.dtype),
+            )
+            ct.store(
+                g_orig,
+                index=(pid, 0, ct_idx),
+                tile=ct.reshape(g_orig_2d, (TILE_SIZE, N, TILE_C)).astype(g_orig.dtype),
+            )
+        ct.store(
+            g_hp, index=(pid, 0), tile=ct.reshape(acc_g_hp_2d, (TILE_SIZE, N)).astype(g_hp.dtype)
+        )
+        ct.store(
+            g_hr,
+            index=(pid, 0, 0),
+            tile=ct.reshape(acc_g_hr_2d, (TILE_SIZE, N, N)).astype(g_hr.dtype),
+        )
+
+    @ct.kernel
+    def _ct_hpb_bwd_bias_kernel(
+        go,
+        hr,
+        orig,
+        hp,
+        x,
+        bias,
+        g_hr,
+        g_orig,
+        g_hp,
+        g_x,
+        N: ConstInt,
+        TILE_C: ConstInt,
+        TILE_SIZE: ConstInt,
+    ):
+        pid = ct.bid(0)
+        num_c_tiles = ct.cdiv(go.shape[2], TILE_C)
+        hp_tile = ct.load(hp, index=(pid, 0), shape=(TILE_SIZE, N))
+        hp_2d = ct.reshape(hp_tile, (1, N))
+        hr_tile = ct.load(hr, index=(pid, 0, 0), shape=(TILE_SIZE, N, N), padding_mode=PAD_ZERO)
+        hr_2d = ct.reshape(hr_tile, (N, N))
+        acc_g_hp_2d = ct.full((N, 1), 0, dtype=ct.float32)
+        acc_g_hr_2d = ct.full((N, N), 0, dtype=ct.float32)
+        for ct_idx in range(num_c_tiles):
+            x_tile = ct.load(
+                x, index=(pid, ct_idx), shape=(TILE_SIZE, TILE_C), padding_mode=PAD_ZERO
+            )
+            bias_tile = ct.load(bias, index=(ct_idx,), shape=(TILE_C,), padding_mode=PAD_ZERO)
+            xb_2d = ct.reshape(x_tile, (1, TILE_C)) + ct.reshape(bias_tile, (1, TILE_C))
+            go_tile = ct.load(
+                go, index=(pid, 0, ct_idx), shape=(TILE_SIZE, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            go_2d = ct.reshape(go_tile, (N, TILE_C))
+            orig_tile = ct.load(
+                orig, index=(pid, 0, ct_idx), shape=(TILE_SIZE, N, TILE_C), padding_mode=PAD_ZERO
+            )
+            orig_2d = ct.reshape(orig_tile, (N, TILE_C))
+            g_x_2d = ct.full((1, TILE_C), 0, dtype=hp.dtype)
+            g_orig_2d = ct.full((N, TILE_C), 0, dtype=hp.dtype)
+            for j in range(N):
+                g_x_2d += ct.extract(hp_2d, (0, j), shape=(1, 1)).item() * ct.extract(
+                    go_2d, (j, 0), shape=(1, TILE_C)
+                )
+                g_orig_2d += ct.extract(hr_2d, (j, 0), shape=(1, N)).reshape((N, 1)) * ct.extract(
+                    go_2d, (j, 0), shape=(1, TILE_C)
+                )
+            acc_g_hp_2d += ct.sum(go_2d * xb_2d, axis=1, keepdims=True)
+            acc_g_hr_2d += ct.sum(
+                ct.expand_dims(go_2d, axis=1) * ct.expand_dims(orig_2d, axis=0), axis=2
+            )
+            ct.store(
+                g_x,
+                index=(pid, ct_idx),
+                tile=ct.reshape(g_x_2d, (TILE_SIZE, TILE_C)).astype(g_x.dtype),
+            )
+            ct.store(
+                g_orig,
+                index=(pid, 0, ct_idx),
+                tile=ct.reshape(g_orig_2d, (TILE_SIZE, N, TILE_C)).astype(g_orig.dtype),
+            )
+        ct.store(
+            g_hp, index=(pid, 0), tile=ct.reshape(acc_g_hp_2d, (TILE_SIZE, N)).astype(g_hp.dtype)
+        )
+        ct.store(
+            g_hr,
+            index=(pid, 0, 0),
+            tile=ct.reshape(acc_g_hr_2d, (TILE_SIZE, N, N)).astype(g_hr.dtype),
+        )
+
+    def _cutile_h_post_bda_fwd(
+        h_res: Tensor, original_residual: Tensor, h_post: Tensor, x: Tensor, bias: Optional[Tensor]
+    ) -> Tensor:
+        s, b, n, C = original_residual.shape
+        sb = s * b
+        TILE_C = math.gcd(C, 1024)
+        TILE_SIZE = math.gcd(sb, 1)
+        out = torch.empty(sb, n, C, dtype=h_res.dtype, device=h_res.device)
+        grid = (math.ceil(sb / TILE_SIZE),)
+        if bias is not None:
+            ct.launch(
+                torch.cuda.current_stream(),
+                grid,
+                _ct_hpb_fwd_bias_kernel,
+                (
+                    h_res.view(sb, n, n),
+                    original_residual.view(sb, n, C),
+                    h_post.view(sb, n),
+                    x.view(sb, C),
+                    bias,
+                    out,
+                    n,
+                    TILE_C,
+                    TILE_SIZE,
+                ),
+            )
+        else:
+            ct.launch(
+                torch.cuda.current_stream(),
+                grid,
+                _ct_hpb_fwd_kernel,
+                (
+                    h_res.view(sb, n, n),
+                    original_residual.view(sb, n, C),
+                    h_post.view(sb, n),
+                    x.view(sb, C),
+                    out,
+                    n,
+                    TILE_C,
+                    TILE_SIZE,
+                ),
+            )
+        return out.view(s, b, n, C)
+
+    def _cutile_h_post_bda_bwd(
+        grad_output: Tensor,
+        h_res: Tensor,
+        original_residual: Tensor,
+        h_post: Tensor,
+        x: Tensor,
+        bias: Optional[Tensor],
+    ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Optional[Tensor]]:
+        s, b, n, C = original_residual.shape
+        sb = s * b
+        TILE_C = math.gcd(C, 1024)
+        TILE_SIZE = math.gcd(sb, 1)
+        g_hr = torch.empty(sb, n, n, dtype=h_res.dtype, device=h_res.device)
+        g_res = torch.empty(sb, n, C, dtype=h_res.dtype, device=h_res.device)
+        g_hp = torch.empty(sb, n, dtype=h_res.dtype, device=h_res.device)
+        g_x = torch.empty(sb, C, dtype=h_res.dtype, device=h_res.device)
+        grid = (sb,)
+        if bias is not None:
+            ct.launch(
+                torch.cuda.current_stream(),
+                grid,
+                _ct_hpb_bwd_bias_kernel,
+                (
+                    grad_output.view(sb, n, C),
+                    h_res.view(sb, n, n),
+                    original_residual.view(sb, n, C),
+                    h_post.view(sb, n),
+                    x.view(sb, C),
+                    bias,
+                    g_hr,
+                    g_res,
+                    g_hp,
+                    g_x,
+                    n,
+                    TILE_C,
+                    TILE_SIZE,
+                ),
+            )
+        else:
+            ct.launch(
+                torch.cuda.current_stream(),
+                grid,
+                _ct_hpb_bwd_kernel,
+                (
+                    grad_output.view(sb, n, C),
+                    h_res.view(sb, n, n),
+                    original_residual.view(sb, n, C),
+                    h_post.view(sb, n),
+                    x.view(sb, C),
+                    g_hr,
+                    g_res,
+                    g_hp,
+                    g_x,
+                    n,
+                    TILE_C,
+                    TILE_SIZE,
+                ),
+            )
+        g_bias = g_x.sum(dim=0) if bias is not None else None
+        return (
+            g_hr.view(s, b, n, n),
+            g_res.view(s, b, n, C),
+            g_hp.view(s, b, n),
+            g_x.view(s, b, C),
+            g_bias,
+        )
+
+    # -- Proj RMS kernels ----------------------------------------------------
+
+    @ct.function
+    def _ct_rms_dnorm(a_tile, norm_tile, dr_tile, K):
+        inv_norm = ct.where(norm_tile > 0, 1.0 / norm_tile, 0.0)
+        inv_sqrt_k = 1.0 / ct.sqrt(K)
+        eps = 1e-8
+        u = norm_tile * inv_sqrt_k + eps
+        coeff = -(1.0 / (u * u)) * inv_sqrt_k
+        return dr_tile * coeff * a_tile * inv_norm
+
+    @ct.kernel
+    def _ct_proj_rms_fwd_kernel(
+        A,
+        B,
+        PROJ,
+        NORM,
+        R,
+        M: int,
+        N: int,
+        K: int,
+        eps: float,
+        TILE_M: ConstInt,
+        TILE_N: ConstInt,
+        TILE_K: ConstInt,
+    ):
+        tile_m_id = ct.bid(0)
+        num_k_tiles = ct.cdiv(K, TILE_K)
+        acc = ct.full((TILE_M, TILE_N), 0.0, dtype=ct.float32)
+        sum_sq = ct.full((TILE_M, 1), 0.0, dtype=ct.float32)
+        for tile_k_id in range(num_k_tiles):
+            a_tile = ct.load(
+                A, index=(tile_m_id, tile_k_id), shape=(TILE_M, TILE_K), padding_mode=PAD_ZERO
+            )
+            b_tile = ct.load(B, index=(0, tile_k_id), shape=(TILE_N, TILE_K), padding_mode=PAD_ZERO)
+            acc = ct.mma(
+                a_tile.astype(ct.tfloat32), b_tile.transpose().astype(ct.tfloat32), acc=acc
+            )
+            sum_sq += ct.sum(a_tile * a_tile, axis=1, keepdims=True)
+        norm_tile = ct.sqrt(sum_sq)
+        v = norm_tile / ct.sqrt(K) + eps
+        r_tile = 1.0 / v
+        ct.store(PROJ, index=(tile_m_id, 0), tile=acc.astype(PROJ.dtype))
+        ct.store(NORM, index=(tile_m_id, 0), tile=norm_tile.astype(NORM.dtype))
+        ct.store(R, index=(tile_m_id, 0), tile=r_tile.astype(R.dtype))
+
+    @ct.kernel
+    def _ct_proj_rms_bwd_kernel(
+        A,
+        B,
+        NORM,
+        DD,
+        DR,
+        DA,
+        DB,
+        M: int,
+        N: int,
+        K: int,
+        TILE_SIZE_M: ConstInt,
+        TILE_SIZE_N: ConstInt,
+        TILE_SIZE_K: ConstInt,
+    ):
+        zero_pad = ct.PaddingMode.ZERO
+        tile_k_id = ct.bid(0)
+        NUM_M_TILES = ct.cdiv(M, TILE_SIZE_M)
+        accumulator_db = ct.full((TILE_SIZE_K, TILE_SIZE_N), 0.0, dtype=ct.float32)
+        for tile_m_id in range(NUM_M_TILES):
+            accumulator_da = ct.full((TILE_SIZE_M, TILE_SIZE_K), 0.0, dtype=ct.float32)
+            a_tile = ct.load(
+                A,
+                index=(tile_m_id, tile_k_id),
+                shape=(TILE_SIZE_M, TILE_SIZE_K),
+                padding_mode=zero_pad,
+            )
+            norm_tile = ct.load(
+                NORM, index=(tile_m_id, 0), shape=(TILE_SIZE_M, 1), padding_mode=zero_pad
+            )
+            dr_tile = ct.load(
+                DR, index=(tile_m_id, 0), shape=(TILE_SIZE_M, 1), padding_mode=zero_pad
+            )
+            accumulator_da = accumulator_da + _ct_rms_dnorm(a_tile, norm_tile, dr_tile, K)
+            b_tile = ct.load(
+                B, index=(0, tile_k_id), shape=(TILE_SIZE_N, TILE_SIZE_K), padding_mode=zero_pad
+            )
+            dd_tile = ct.load(
+                DD, index=(tile_m_id, 0), shape=(TILE_SIZE_M, TILE_SIZE_N), padding_mode=zero_pad
+            )
+            dd_tile = ct.astype(dd_tile, ct.tfloat32)
+            accumulator_da = ct.mma(dd_tile, b_tile.astype(ct.tfloat32), acc=accumulator_da)
+            ct.store(DA, index=(tile_m_id, tile_k_id), tile=accumulator_da.astype(DA.dtype))
+            accumulator_db = ct.mma(
+                a_tile.transpose().astype(ct.tfloat32), dd_tile, acc=accumulator_db
+            )
+        ct.store(DB, index=(0, tile_k_id), tile=accumulator_db.transpose().astype(DB.dtype))
+
+    @ct.kernel
+    def _ct_proj_rms_bwd_small_k_kernel(
+        A, B, NORM, DD, DR, DA, DB, M: int, N: int, K: int, TILE_N_SIZE: ConstInt
+    ):
+        zero_pad = ct.PaddingMode.ZERO
+        TILE_DB_SIZE_M = 128
+        TILE_DB_SIZE_K = 64
+        NUM_M_TILES = ct.cdiv(M, TILE_DB_SIZE_M)
+        NUM_K_TILES = ct.cdiv(K, TILE_DB_SIZE_K)
+        if ct.bid(1) == 0:
+            for tile_id in range(ct.bid(0), NUM_K_TILES, ct.num_blocks(0)):
+                accumulator_db = ct.full((TILE_DB_SIZE_K, TILE_N_SIZE), 0.0, dtype=ct.float32)
+                for m_tile in range(NUM_M_TILES):
+                    a_tile = ct.load(
+                        A,
+                        index=(m_tile, tile_id),
+                        shape=(TILE_DB_SIZE_M, TILE_DB_SIZE_K),
+                        padding_mode=zero_pad,
+                    )
+                    dd_tile = ct.load(
+                        DD,
+                        index=(m_tile, 0),
+                        shape=(TILE_DB_SIZE_M, TILE_N_SIZE),
+                        padding_mode=zero_pad,
+                    )
+                    accumulator_db = ct.mma(
+                        a_tile.transpose().astype(ct.tfloat32),
+                        dd_tile.astype(ct.tfloat32),
+                        acc=accumulator_db,
+                    )
+                ct.store(
+                    DB,
+                    index=(0, tile_id),
+                    tile=accumulator_db.transpose().astype(DB.dtype),
+                    allow_tma=False,
+                )
+        TILE_DA_SIZE_M = 128
+        TILE_DA_SIZE_K = 256
+        NUM_DA_TILES = ct.cdiv(M, TILE_DA_SIZE_M) * ct.cdiv(K, TILE_DA_SIZE_K)
+        NUM_DA_K_TILES = ct.cdiv(K, TILE_DA_SIZE_K)
+        if ct.bid(1) == 1:
+            for tile_id in range(ct.bid(0), NUM_DA_TILES, ct.num_blocks(0)):
+                b_tile_idx = tile_id % NUM_DA_K_TILES
+                dd_tile_idx = tile_id // NUM_DA_K_TILES
+                accumulator_da = ct.full((TILE_DA_SIZE_M, TILE_DA_SIZE_K), 0.0, dtype=ct.float32)
+                a_tile = ct.load(
+                    A,
+                    index=(dd_tile_idx, b_tile_idx),
+                    shape=(TILE_DA_SIZE_M, TILE_DA_SIZE_K),
+                    padding_mode=zero_pad,
+                )
+                norm_tile = ct.load(
+                    NORM, index=(dd_tile_idx, 0), shape=(TILE_DA_SIZE_M, 1), padding_mode=zero_pad
+                )
+                dr_tile = ct.load(
+                    DR, index=(dd_tile_idx, 0), shape=(TILE_DA_SIZE_M, 1), padding_mode=zero_pad
+                )
+                accumulator_da = accumulator_da + _ct_rms_dnorm(
+                    a_tile.astype(ct.float32), norm_tile, dr_tile, K
+                )
+                b_tile = ct.load(
+                    B,
+                    index=(0, b_tile_idx),
+                    shape=(TILE_N_SIZE, TILE_DA_SIZE_K),
+                    padding_mode=zero_pad,
+                )
+                dd_tile = ct.load(
+                    DD,
+                    index=(dd_tile_idx, 0),
+                    shape=(TILE_DA_SIZE_M, TILE_N_SIZE),
+                    padding_mode=zero_pad,
+                )
+                accumulator_da = ct.mma(
+                    dd_tile.astype(ct.tfloat32), b_tile.astype(ct.tfloat32), acc=accumulator_da
+                )
+                ct.store(DA, index=(dd_tile_idx, b_tile_idx), tile=accumulator_da.astype(DA.dtype))
+
+    def _next_power_of_2(n: int) -> int:
+        n -= 1
+        n |= n >> 1
+        n |= n >> 2
+        n |= n >> 4
+        n |= n >> 8
+        n |= n >> 16
+        n |= n >> 32
+        n += 1
+        return n
+
+    def _cutile_proj_rms_fwd(
+        x: Tensor, weight: Tensor, eps: float = 1e-8
+    ) -> Tuple[Tensor, Tensor, Tensor]:
+        M, K = x.shape
+        N = weight.shape[0]
+        TILE_M = 128
+        TILE_N = _next_power_of_2(N)
+        TILE_K = 128
+        num_tiles_m = math.ceil(M / TILE_M)
+        proj = torch.empty(M, N, dtype=x.dtype, device=x.device)
+        norm = torch.empty(M, 1, dtype=x.dtype, device=x.device)
+        r = torch.empty(M, 1, dtype=x.dtype, device=x.device)
+        ct.launch(
+            torch.cuda.current_stream(),
+            (num_tiles_m,),
+            _ct_proj_rms_fwd_kernel,
+            (x, weight, proj, norm, r, M, N, K, eps, TILE_M, TILE_N, TILE_K),
+        )
+        return proj, norm, r
+
+    def _cutile_proj_rms_bwd(
+        grad_proj: Tensor,
+        grad_r: Tensor,
+        x: Tensor,
+        weight: Tensor,
+        norm: Tensor,
+        eps: float = 1e-8,
+    ) -> Tuple[Tensor, Tensor]:
+        M, K = x.shape
+        N = weight.shape[0]
+        da = torch.empty_like(x)
+        db = torch.empty_like(weight)
+        TILE_SIZE_N = _next_power_of_2(N)
+        assert TILE_SIZE_N <= 256, f"TILE_SIZE_N too large: {TILE_SIZE_N}"
+        num_sms = torch.cuda.get_device_properties("cuda").multi_processor_count
+        if K >= 8192:
+            TILE_SIZE_M, TILE_SIZE_K = 128, 128
+            grid = (math.ceil(K / TILE_SIZE_K), 1)
+            ct.launch(
+                torch.cuda.current_stream(),
+                grid,
+                _ct_proj_rms_bwd_kernel,
+                (
+                    x,
+                    weight,
+                    norm,
+                    grad_proj,
+                    grad_r,
+                    da,
+                    db,
+                    M,
+                    N,
+                    K,
+                    TILE_SIZE_M,
+                    TILE_SIZE_N,
+                    TILE_SIZE_K,
+                ),
+            )
+        else:
+            grid = (num_sms, 2, 1)
+            ct.launch(
+                torch.cuda.current_stream(),
+                grid,
+                _ct_proj_rms_bwd_small_k_kernel,
+                (x, weight, norm, grad_proj, grad_r, da, db, M, N, K, TILE_SIZE_N),
+            )
+        return da, db
+
+
+# ============================================================================
+# Autograd Functions (cuTile only – guarded by _CUTILE_AVAILABLE)
+# ============================================================================
+
+if not _CUTILE_AVAILABLE:
+
+    def _no_cutile_error(*_args, **_kwargs):
+        raise RuntimeError(
+            "Fused mHC kernels require cuda.tile (cuTile) which is not installed. "
+            "Either install cuTile or set use_fused_mhc=False to use reference "
+            "implementations."
+        )
+
+    fused_sinkhorn = _no_cutile_error
+    fused_h_aggregate = _no_cutile_error
+    fused_h_post_bda = _no_cutile_error
+    fused_proj_rms = _no_cutile_error
+
+else:
+
+    class FusedSinkhornKnopp(torch.autograd.Function):
+        """Fused Sinkhorn-Knopp projection to doubly stochastic matrix (cuTile)."""
+
+        @staticmethod
+        def forward(ctx, input_logits: Tensor, num_iterations: int, eps: float = 1e-6):
+            """cuTile fused Sinkhorn forward."""
+            output, M_init = _cutile_sinkhorn_fwd(input_logits, num_iterations, eps)
+            ctx.save_for_backward(M_init)
+            ctx.num_iterations = num_iterations
+            ctx.eps = eps
+            return output
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            """cuTile fused Sinkhorn backward."""
+            (M_init,) = ctx.saved_tensors
+            grad_input = _cutile_sinkhorn_bwd(grad_output, M_init, ctx.num_iterations, ctx.eps)
+            return grad_input, None, None
+
+    class FusedHAggregate(torch.autograd.Function):
+        """Fused n-stream weighted aggregation (cuTile)."""
+
+        @staticmethod
+        def forward(ctx, x: Tensor, h_pre: Tensor):
+            """cuTile fused h_aggregate forward."""
+            output = _cutile_h_aggregate_fwd(x, h_pre)
+            ctx.save_for_backward(x, h_pre)
+            return output
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            """cuTile fused h_aggregate backward."""
+            x, h_pre = ctx.saved_tensors
+            return _cutile_h_aggregate_bwd(grad_output, x, h_pre)
+
+    class FusedHPostBDA(torch.autograd.Function):
+        """Fused: output = H_res @ orig_res + H_post * (x [+ bias]) (cuTile)."""
+
+        @staticmethod
+        def forward(
+            ctx,
+            h_res: Tensor,
+            original_residual: Tensor,
+            h_post: Tensor,
+            x: Tensor,
+            bias: Optional[Tensor],
+        ):
+            """cuTile fused h_post_bda forward."""
+            output = _cutile_h_post_bda_fwd(h_res, original_residual, h_post, x, bias)
+            if bias is not None:
+                ctx.save_for_backward(h_res, original_residual, h_post, x, bias)
+                ctx.has_bias = True
+            else:
+                ctx.save_for_backward(h_res, original_residual, h_post, x)
+                ctx.has_bias = False
+            return output
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            """cuTile fused h_post_bda backward."""
+            if ctx.has_bias:
+                h_res, orig_res, h_post, x, bias = ctx.saved_tensors
+            else:
+                h_res, orig_res, h_post, x = ctx.saved_tensors
+                bias = None
+            return _cutile_h_post_bda_bwd(grad_output, h_res, orig_res, h_post, x, bias)
+
+    class FusedProjRms(torch.autograd.Function):
+        """Fused projection + RMS normalization (cuTile)."""
+
+        @staticmethod
+        def forward(ctx, x: Tensor, weight: Tensor, eps: float = 1e-6):
+            """cuTile fused proj_rms forward."""
+            proj, norm, r = _cutile_proj_rms_fwd(x, weight, eps)
+            ctx.save_for_backward(x, weight, norm)
+            ctx.eps = eps
+            return proj, r
+
+        @staticmethod
+        def backward(ctx, grad_proj, grad_r):
+            """cuTile fused proj_rms backward."""
+            x, weight, norm = ctx.saved_tensors
+            grad_x, grad_weight = _cutile_proj_rms_bwd(grad_proj, grad_r, x, weight, norm, ctx.eps)
+            return grad_x, grad_weight, None
+
+    # ========================================================================
+    # Public API (only available when cuTile is installed)
+    # ========================================================================
+
+    def fused_sinkhorn(input_logits: Tensor, num_iterations: int, eps: float = 1e-6) -> Tensor:
+        """Project logits to doubly stochastic matrix via Sinkhorn-Knopp.
+
+        Args:
+            input_logits: [..., n, n] raw logits
+            num_iterations: Sinkhorn iterations
+            eps: numerical stability
+
+        Returns:
+            [..., n, n] doubly stochastic matrix
+        """
+        return FusedSinkhornKnopp.apply(input_logits, num_iterations, eps)
+
+    def fused_h_aggregate(x: Tensor, h_pre: Tensor) -> Tensor:
+        """Weighted n-stream to 1-stream aggregation.
+
+        Args:
+            x: [s, b, n, C] n-stream hidden states
+            h_pre: [s, b, n] aggregation weights
+
+        Returns:
+            [s, b, C] aggregated hidden states
+        """
+        return FusedHAggregate.apply(x, h_pre)
+
+    def fused_h_post_bda(
+        h_res: Tensor, original_residual: Tensor, h_post: Tensor, x: Tensor, bias: Optional[Tensor]
+    ) -> Tensor:
+        """Fused H_res @ residual + H_post * (x + bias).
+
+        Args:
+            h_res: [s, b, n, n] residual mixing matrix
+            original_residual: [s, b, n, C] n-stream residual
+            h_post: [s, b, n] expansion weights
+            x: [s, b, C] layer output
+            bias: [C] or None
+
+        Returns:
+            [s, b, n, C] fused output
+        """
+        return FusedHPostBDA.apply(h_res, original_residual, h_post, x, bias)
+
+    def fused_proj_rms(x: Tensor, weight: Tensor, eps: float = 1e-6) -> Tuple[Tensor, Tensor]:
+        """Fused projection + RMS normalization.
+
+        Args:
+            x: [M, K] input
+            weight: [N, K] projection weight
+            eps: stability epsilon
+
+        Returns:
+            proj: [M, N] = x @ weight^T
+            r: [M, 1] = 1 / (||x|| / sqrt(K) + eps)
+        """
+        return FusedProjRms.apply(x, weight, eps)
diff --git a/megatron/core/fusions/linear_cross_entropy/__init__.py b/megatron/core/fusions/linear_cross_entropy/__init__.py
new file mode 100644
index 00000000000..b9a9591fa69
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/__init__.py b/megatron/core/fusions/linear_cross_entropy/blackwell/__init__.py
new file mode 100644
index 00000000000..b9a9591fa69
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/bwd_partial_dlogits.py b/megatron/core/fusions/linear_cross_entropy/blackwell/bwd_partial_dlogits.py
new file mode 100644
index 00000000000..3178e8c6909
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/bwd_partial_dlogits.py
@@ -0,0 +1,667 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+import logging
+from typing import Optional, Tuple, Type
+
+try:
+    import cuda.bindings.driver as cuda  # type: ignore
+    import cutlass
+    import cutlass.cute as cute
+    import cutlass.pipeline as pipeline  # type: ignore
+    import cutlass.utils as utils  # type: ignore
+    import cutlass.utils.blackwell_helpers as sm100_utils  # type: ignore
+    from cutlass.cute.nvgpu import cpasync, tcgen05
+
+    SM100_TMEM_CAPACITY_COLUMNS: int = 512
+
+    def make_thread_cooperative_group(size: int, alignment: Optional[int] = None):
+        """
+        Create a thread cooperative group.
+        """
+        return pipeline.CooperativeGroup(
+            pipeline.Agent.Thread, size, alignment=alignment if alignment is not None else size
+        )
+
+    class BwdPartialDlogits:
+        """
+        This class implements the backward kernel for partial d_logits.
+        """
+
+        def __init__(
+            self,
+            reduction: int,
+            acc_dtype: Type[cutlass.Numeric] = cutlass.Float32,
+            use_2cta_instrs: bool = False,
+            mma_tiler_mn: Tuple[int, int] = (128, 256),
+            vocab_per_split: int = 512,
+        ):
+            self.REDUCTION: cutlass.Constexpr[cutlass.Int32] = cutlass.const_expr(reduction)
+            self.acc_dtype = acc_dtype
+            self.use_2cta_instrs = use_2cta_instrs
+            self.mma_tiler = (*mma_tiler_mn, 1)
+            self.vocab_per_split = vocab_per_split
+
+            self.cta_group = tcgen05.CtaGroup.TWO if self.use_2cta_instrs else tcgen05.CtaGroup.ONE
+            self.cluster_shape_mn = (2, 1) if self.use_2cta_instrs else (1, 1)
+
+            self.smem_capacity = utils.get_smem_capacity_in_bytes("sm_100")
+
+            self.threads_per_warp: int = 32
+
+            self.epi_warp_ids = (0, 1, 2, 3)
+            self.load_warp_ids = 4
+            self.mma_warp_ids = 5
+            self.empty_warp_ids = (6, 7)
+
+            self.threads_per_cta: int = self.threads_per_warp * len(
+                (*self.epi_warp_ids, self.load_warp_ids, self.mma_warp_ids, *self.empty_warp_ids)
+            )
+            self.cta_sync_barrier = pipeline.NamedBarrier(
+                barrier_id=1, num_threads=self.threads_per_cta
+            )
+
+            self.buffer_align_bytes: int = 1024
+            self.num_regs_other: int = 32
+            self.num_regs_epi: int = 192
+
+        def _compute_grid(
+            self,
+            problem_mnk: Tuple[int, int, int],
+            cluster_shape_mn: Tuple[int, int],
+            cta_tiler: Tuple[int, int, int],
+        ) -> Tuple[int, int, int]:
+            cluster_shape_mnk = (*cluster_shape_mn, 1)
+
+            grid = cute.round_up(
+                (
+                    cute.ceil_div(problem_mnk[0], cta_tiler[0]),
+                    cute.ceil_div(self.vocab_per_split, cta_tiler[1]),
+                    1,
+                ),
+                cluster_shape_mnk,
+            )
+            return grid
+
+        def _compute_stages(
+            self,
+            tiled_mma: cute.TiledMma,
+            mma_tiler: Tuple[int, int, int],
+            a_dtype: Type[cutlass.Numeric],
+            b_dtype: Type[cutlass.Numeric],
+        ):
+            num_acc_stage = 1
+            num_ab_stage = 4
+            num_epi_stage_per_tile = 4
+            return num_acc_stage, num_ab_stage, num_epi_stage_per_tile
+
+        def _setup_attributes(
+            self,
+            tiled_mma: cute.TiledMma,
+            a_dtype: Type[cutlass.Numeric],
+            b_dtype: Type[cutlass.Numeric],
+        ):
+            self.cluster_shape_mnk = (*self.cluster_shape_mn, 1)
+            self.cluster_layout_vmnk = cute.tiled_divide(
+                cute.make_layout(self.cluster_shape_mnk), (tiled_mma.thr_id.shape,)
+            )
+
+            mma_inst_shape_k = cute.size(tiled_mma.shape_mnk, mode=[2])
+            # it requires k-mode to be 128B aligned
+            mma_inst_tile_k: int = 4
+            self.mma_tiler = (
+                self.mma_tiler[0],
+                self.mma_tiler[1],
+                mma_inst_shape_k * mma_inst_tile_k,
+            )
+
+            self.num_acc_stage, self.num_ab_stage, self.num_epi_stage_per_tile = (
+                self._compute_stages(tiled_mma, self.mma_tiler, a_dtype, b_dtype)
+            )
+            self.tmem_alloc_cols = self.num_acc_stage * self.mma_tiler[1]
+            assert self.tmem_alloc_cols <= SM100_TMEM_CAPACITY_COLUMNS
+
+            self.cta_tile_shape_mnk = (
+                self.mma_tiler[0] // cute.size(tiled_mma.thr_id.shape),
+                self.mma_tiler[1],
+                self.mma_tiler[2],
+            )
+
+        @cute.kernel
+        def kernel(
+            self,
+            split_idx: cutlass.Int32,
+            tiled_mma: cute.TiledMma,
+            tma_atom_a: cute.CopyAtom,
+            mA: cute.Tensor,
+            tma_atom_b: cute.CopyAtom,
+            mB: cute.Tensor,
+            mLabels: cute.Tensor,
+            mDlogprobs: cute.Tensor,
+            mMaximum: cute.Tensor,
+            mAccu: cute.Tensor,
+            mDlogits_partial: cute.Tensor,
+            scalarNumValidTokens: cute.Pointer,
+            ignore_index: cutlass.Int64,
+            a_smem_layout_staged: cute.ComposedLayout,
+            b_smem_layout_staged: cute.ComposedLayout,
+            cluster_layout_vmnk: cute.Layout,
+            problem_mnk: Tuple[int, int, int],
+            rank: cutlass.Int32,
+        ) -> None:
+            """
+            The backward kernel for partial d_logits.
+            """
+            warp_idx = cute.arch.make_warp_uniform(cute.arch.warp_idx())
+            tidx, _, _ = cute.arch.thread_idx()
+            bidx, bidy, _ = cute.arch.block_idx()
+            # FIXME: block swizzling applied here
+            pidm, pidn = bidx, bidy
+
+            # FIXME: if 2 CTAs, modify here
+            cta_rank_in_cluster = 0
+            block_in_cluster_coord_vmnk = cluster_layout_vmnk.get_flat_coord(cta_rank_in_cluster)
+
+            # prefetch tma descriptors
+            if warp_idx == self.load_warp_ids:
+                cute.nvgpu.cpasync.prefetch_descriptor(tma_atom_a)
+                cute.nvgpu.cpasync.prefetch_descriptor(tma_atom_b)
+
+            smem = utils.SmemAllocator()
+            storage = smem.allocate(self.shared_storage)
+
+            ab_pipeline = pipeline.PipelineTmaUmma.create(
+                num_stages=self.num_ab_stage,
+                producer_group=make_thread_cooperative_group(len([self.load_warp_ids])),
+                consumer_group=make_thread_cooperative_group(len([self.mma_warp_ids])),
+                tx_count=self.tma_copy_ab_bytes,
+                barrier_storage=storage.load_ab_mbar_ptr.data_ptr(),
+            )
+            ab_producer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Producer, self.num_ab_stage
+            )
+            ab_consumer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Consumer, self.num_ab_stage
+            )
+
+            mma_pipeline = pipeline.PipelineUmmaAsync.create(
+                num_stages=self.num_acc_stage,
+                producer_group=make_thread_cooperative_group(len([self.mma_warp_ids])),
+                consumer_group=make_thread_cooperative_group(
+                    self.threads_per_warp * len(self.epi_warp_ids)
+                ),
+                barrier_storage=storage.mma_mbar_ptr.data_ptr(),
+            )
+            mma_producer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Producer, self.num_acc_stage
+            )
+            mma_consumer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Consumer, self.num_acc_stage
+            )
+
+            tmem_dealloc_mbar_ptr = storage.tmem_dealloc_mbar_ptr.data_ptr()
+            if warp_idx == self.empty_warp_ids[0]:
+                with cute.arch.elect_one():
+                    cute.arch.mbarrier_init(
+                        tmem_dealloc_mbar_ptr, self.threads_per_warp * len(self.epi_warp_ids)
+                    )
+                    cute.arch.mbarrier_init_fence()
+
+            # -------- tensor partition ------------ #
+            # swizzle o [(tileM, tileK), loopM, loopK, stage]
+            sA = storage.sA.get_tensor(
+                a_smem_layout_staged.outer, swizzle=a_smem_layout_staged.inner
+            )
+            # swizzle o [(tileN, tileK), loopN, loopK, stage]
+            sB = storage.sB.get_tensor(
+                b_smem_layout_staged.outer, swizzle=b_smem_layout_staged.inner
+            )
+
+            # FIXME: if 2 CTAs, modify here
+            thr_mma = tiled_mma.get_slice(0)
+            # [MMA, loopM, loopK, stage]
+            tCsA = thr_mma.make_fragment_A(sA)
+            # [MMA, loopN, loopK, stage]
+            tCsB = thr_mma.make_fragment_B(sB)
+
+            # [tileM, tileK, loopK]
+            gA = cute.local_tile(
+                mA, (self.cta_tile_shape_mnk[0], self.cta_tile_shape_mnk[2]), (pidm, None)
+            )
+            # [vocab_per_split, dim]
+            mB_n = cute.local_tile(
+                mB, (self.vocab_per_split, cute.size(mB.layout.shape, mode=[1])), (split_idx, 0)
+            )
+            # [tileN, tileK, loopK]
+            gB = cute.local_tile(
+                mB_n, (self.cta_tile_shape_mnk[1], self.cta_tile_shape_mnk[2]), (pidn, None)
+            )
+
+            a_cta_layout = cute.make_layout(cute.slice_(cluster_layout_vmnk, (0, 0, None, 0)).shape)
+            # just to make sure SMEM and GMEM tensor has the same size in the first rank
+            tCgA = thr_mma.partition_A(gA)
+            tCgB = thr_mma.partition_B(gB)
+            # [CPY, stage] & [CPY, loopK]
+            tTMAsA, tTMAgA = cpasync.tma_partition(
+                tma_atom_a,
+                block_in_cluster_coord_vmnk[2],  # cta_coord,
+                a_cta_layout,
+                cute.group_modes(sA, 0, 3),
+                cute.group_modes(tCgA, 0, 3),
+            )
+            b_cta_layout = cute.make_layout(cute.slice_(cluster_layout_vmnk, (0, None, 0, 0)).shape)
+            # [CPY, stage] & [CPY, loopK]
+            tTMAsB, tTMAgB = cpasync.tma_partition(
+                tma_atom_b,
+                block_in_cluster_coord_vmnk[1],  # cta_coord
+                b_cta_layout,
+                cute.group_modes(sB, 0, 3),
+                cute.group_modes(tCgB, 0, 3),
+            )
+
+            # ------ Allocate TMEM ------ #
+            tmem_holding_buf = storage.tmem_holding_buf
+            if warp_idx == self.empty_warp_ids[0]:
+                cute.arch.alloc_tmem(
+                    self.tmem_alloc_cols, tmem_holding_buf, is_two_cta=self.use_2cta_instrs
+                )
+            self.cta_sync_barrier.arrive_and_wait()
+            tmem_ptr = cute.arch.retrieve_tmem_ptr(
+                self.acc_dtype, alignment=16, ptr_to_buffer_holding_addr=tmem_holding_buf
+            )
+
+            tmem_shape = (128, self.tmem_alloc_cols)
+            acc_shape = thr_mma.partition_shape_C(tmem_shape)
+            tCtC_fake = thr_mma.make_fragment_C(acc_shape)
+            # [(tileM, tileN), loopM, loopN]
+            tCtC = cute.make_tensor(tmem_ptr, tCtC_fake.layout)
+
+            # ------ Empty ------ #
+            if warp_idx in self.empty_warp_ids:
+                cute.arch.warpgroup_reg_dealloc(self.num_regs_other)
+
+            # ------ Load ------ #
+            if warp_idx == self.load_warp_ids:
+                cute.arch.warpgroup_reg_dealloc(self.num_regs_other)
+
+                for k in cutlass.range(cute.size(gA, mode=[2])):
+                    ab_pipeline.producer_acquire(ab_producer_state)
+                    cute.copy(
+                        tma_atom_a,
+                        tTMAgA[(None, k)],
+                        tTMAsA[(None, ab_producer_state.index)],
+                        tma_bar_ptr=ab_pipeline.producer_get_barrier(ab_producer_state),
+                    )
+                    cute.copy(
+                        tma_atom_b,
+                        tTMAgB[(None, k)],
+                        tTMAsB[(None, ab_producer_state.index)],
+                        tma_bar_ptr=ab_pipeline.producer_get_barrier(ab_producer_state),
+                    )
+                    ab_pipeline.producer_commit(ab_producer_state)
+                    ab_producer_state.advance()
+
+            # ------ MMA ------ #
+            if warp_idx == self.mma_warp_ids:
+                cute.arch.warpgroup_reg_dealloc(self.num_regs_other)
+
+                tiled_mma.set(tcgen05.Field.ACCUMULATE, False)
+                mma_pipeline.producer_acquire(mma_producer_state)
+
+                for k in cutlass.range(cute.size(gA, mode=[2])):
+                    ab_pipeline.consumer_wait(ab_consumer_state)
+
+                    for kblock_idx in cutlass.range(cute.size(tCsA, mode=[2]), unroll_full=True):
+                        cute.gemm(
+                            tiled_mma,
+                            cute.append_ones(tCtC[(None, None, mma_producer_state.index)]),
+                            tCsA[(None, None, kblock_idx, ab_consumer_state.index)],
+                            tCsB[(None, None, kblock_idx, ab_consumer_state.index)],
+                            cute.append_ones(tCtC[(None, None, mma_producer_state.index)]),
+                        )
+                        tiled_mma.set(tcgen05.Field.ACCUMULATE, True)
+
+                    ab_pipeline.consumer_release(ab_consumer_state)
+                    ab_consumer_state.advance()
+
+                mma_pipeline.producer_commit(mma_producer_state)
+                mma_producer_state.advance()
+
+            # ------ EPI ------ #
+            if warp_idx in self.epi_warp_ids:
+                cute.arch.warpgroup_reg_alloc(self.num_regs_epi)
+
+                copy_atom_t2r = sm100_utils.get_tmem_load_op(
+                    self.cta_tile_shape_mnk,
+                    utils.LayoutEnum.ROW_MAJOR,
+                    self.acc_dtype,
+                    self.acc_dtype,
+                    (self.epi_tile[0], self.epi_tile[1] // self.num_epi_stage_per_tile),
+                    self.use_2cta_instrs,
+                )
+                # [tileM, subTileN, loopM, CntSubTileN, loopN]
+                tAcc_epi = cute.flat_divide(
+                    tCtC[((None, None), 0, None)],
+                    (self.epi_tile[0], self.epi_tile[1] // self.num_epi_stage_per_tile),
+                )
+                tiled_copy_t2r = tcgen05.make_tmem_copy(
+                    copy_atom_t2r, tAcc_epi[(None, None, 0, 0, 0)]
+                )
+                thr_copy_t2r = tiled_copy_t2r.get_slice(tidx)
+                tTMEM_load_tAcc = thr_copy_t2r.partition_S(tAcc_epi)
+                tTMEM_load_tAcc = cute.group_modes(
+                    tTMEM_load_tAcc, 3, cute.rank(tTMEM_load_tAcc) - 1
+                )
+
+                # predicates
+                cAcc = cute.make_identity_tensor(self.mma_tiler[:2])
+                tCcAcc = thr_mma.partition_C(cAcc)
+                tCcAcc_epi = cute.flat_divide(
+                    tCcAcc[((None, None), 0, None)],
+                    (self.epi_tile[0], self.epi_tile[1] // self.num_epi_stage_per_tile),
+                )
+                tTMEM_load_cAcc = thr_copy_t2r.partition_D(tCcAcc_epi)
+                tTMEM_load_cAcc_shape = cute.select(tTMEM_load_cAcc.shape, mode=[0, 1, 2])
+                tTMEM_load_rAcc = cute.make_fragment(tTMEM_load_cAcc_shape, self.acc_dtype)
+
+                copy_atom_g2r_int64 = cute.make_copy_atom(
+                    cute.nvgpu.CopyUniversalOp(), mLabels.element_type
+                )
+                copy_atom_g2r_fp32 = cute.make_copy_atom(
+                    cute.nvgpu.CopyUniversalOp(), mDlogprobs.element_type
+                )
+                epilogue_thread_layout = cute.make_layout((128, 1), stride=(1, 1))
+                tiled_copy_g2r_int64 = cute.make_tiled_copy_tv(
+                    copy_atom_g2r_int64, epilogue_thread_layout, cute.make_layout((1, 1))
+                )
+                tiled_copy_g2r_fp32 = cute.make_tiled_copy_tv(
+                    copy_atom_g2r_fp32, epilogue_thread_layout, cute.make_layout((1, 1))
+                )
+                thr_copy_g2r_int64 = tiled_copy_g2r_int64.get_slice(tidx)
+                thr_copy_g2r_fp32 = tiled_copy_g2r_fp32.get_slice(tidx)
+
+                # [tileM]
+                gLabels = cute.local_tile(mLabels, (self.epi_tile[0],), (pidm,))
+                gMaximum = cute.local_tile(mMaximum, (self.epi_tile[0],), (pidm,))
+                gAccu = cute.local_tile(mAccu, (self.epi_tile[0],), (pidm,))
+
+                # slice along M direction
+                tMCAcc = thr_copy_g2r_int64.partition_S(cAcc)[(None, None, 0)]
+                # [(1, 1), 1]
+                tMCAcc_mask = cute.make_fragment(tMCAcc.shape, cutlass.Boolean)
+                # to align shape with gMax and gAccu
+                tMCAcc_mask = cute.append_ones(tMCAcc_mask)
+                tMCAcc_mask[0] = cute.elem_less(
+                    pidm * self.epi_tile[0] + tidx, cute.size(mA, mode=[0])
+                )
+                # [(1, 1), 1, 1]
+                tMgLabels = thr_copy_g2r_int64.partition_S(cute.append_ones(gLabels))
+                tMrLabels = cute.make_fragment(tMgLabels.shape, tMgLabels.element_type)
+                cute.copy(tiled_copy_g2r_int64, tMgLabels, tMrLabels, pred=tMCAcc_mask)
+                tMgMaximum = thr_copy_g2r_fp32.partition_S(cute.append_ones(gMaximum))
+                tMrMaximum = cute.make_fragment(tMgMaximum.layout, tMgMaximum.element_type)
+                cute.copy(tiled_copy_g2r_fp32, tMgMaximum, tMrMaximum, pred=tMCAcc_mask)
+                tMgAccu = thr_copy_g2r_fp32.partition_S(cute.append_ones(gAccu))
+                tMrAccu = cute.make_fragment(tMgAccu.layout, tMgAccu.element_type)
+                cute.copy(tiled_copy_g2r_fp32, tMgAccu, tMrAccu, pred=tMCAcc_mask)
+
+                tMrDlogprobs = cute.make_fragment(tMgAccu.layout, mDlogprobs.element_type)
+                if cutlass.const_expr(self.REDUCTION == 2):
+                    # mean reduction
+                    num_valid_tokens = cute.make_tensor(scalarNumValidTokens, layout=(1,))
+                    tMrDlogprobs[0] = mDlogprobs[0] / num_valid_tokens[0].to(cutlass.Float32)
+                elif cutlass.const_expr(self.REDUCTION == 1):
+                    # sum reduction
+                    tMrDlogprobs[0] = mDlogprobs[0]
+                else:
+                    # no reduction
+                    gDlogprobs = cute.local_tile(mDlogprobs, (self.epi_tile[0],), (pidm,))
+                    tMgDlogprobs = thr_copy_g2r_fp32.partition_S(cute.append_ones(gDlogprobs))
+                    cute.copy(tiled_copy_g2r_fp32, tMgDlogprobs, tMrDlogprobs, pred=tMCAcc_mask)
+
+                tMrAccu[0] = cute.arch.rcp_approx(tMrAccu[0])
+                tMrDlogprobs[0] *= tMrLabels[0] != ignore_index
+                tMr_d_acc_exp_logits = tMrDlogprobs[0] * tMrAccu[0]
+
+                # ------ Partial output ------ #
+                # [tileM, tileN]
+                gDlogits_partial = cute.local_tile(
+                    mDlogits_partial, (self.epi_tile[0], self.epi_tile[1]), (pidm, pidn)
+                )
+                # blackwell supports STG.256
+                copy_atom_r2g = cute.make_copy_atom(
+                    cute.nvgpu.CopyUniversalOp(),
+                    gDlogits_partial.element_type,
+                    num_bits_per_copy=256,
+                )
+                tiled_copy_r2g = cute.make_tiled_copy_tv(
+                    copy_atom_r2g, epilogue_thread_layout, copy_atom_r2g.layout_dst_tv
+                )
+                thr_copy_r2g = tiled_copy_r2g.get_slice(tidx)
+
+                # [CPY, loopM, loopN]
+                tR2GCAcc = thr_copy_r2g.partition_S(cAcc)
+                tR2GCAcc_pred = cute.make_fragment(tR2GCAcc.shape, cutlass.Boolean)
+                for elem in cutlass.range(cute.size(tR2GCAcc_pred, mode=[0])):
+                    for row in cutlass.range(cute.size(tR2GCAcc_pred, mode=[1])):
+                        for col in cutlass.range(cute.size(tR2GCAcc_pred, mode=[2])):
+                            tR2GCAcc_pred[elem, row, col] = cute.elem_less(
+                                pidm * self.epi_tile[0] + tR2GCAcc[elem, row, col][0],
+                                problem_mnk[0],
+                            ) and cute.elem_less(
+                                split_idx * self.vocab_per_split
+                                + pidn * self.epi_tile[1]
+                                + tR2GCAcc[elem, row, col][1],
+                                problem_mnk[1],
+                            )
+
+                tR2GgDlogits = thr_copy_r2g.partition_D(gDlogits_partial)
+
+                # for type conversion
+                dLogits_half = cute.make_fragment(tTMEM_load_rAcc.shape, tR2GgDlogits.element_type)
+                dLogits_half = cute.tiled_divide(
+                    dLogits_half, (cute.size(tR2GgDlogits, mode=[0]), 1)
+                )
+                dLogits_half = cute.group_modes(dLogits_half, 2, cute.rank(dLogits_half))
+
+                mma_pipeline.consumer_wait(mma_consumer_state)
+
+                block_vocab_left_idx: cutlass.Int64 = (
+                    split_idx * self.vocab_per_split + pidn * self.epi_tile[1]
+                )
+                block_vocab_right_idx: cutlass.Int64 = min(
+                    split_idx * self.vocab_per_split + (pidn + 1) * self.epi_tile[1],
+                    min((split_idx + 1) * self.vocab_per_split, problem_mnk[1]),
+                )
+                num_n_subtiles: cutlass.Int64 = cute.ceil_div(
+                    (block_vocab_right_idx - block_vocab_left_idx),
+                    cute.size(tTMEM_load_rAcc, mode=[0]),
+                )
+                for n_subtile in cutlass.range(num_n_subtiles):
+                    cute.copy(
+                        tiled_copy_t2r,
+                        tTMEM_load_tAcc[(None, None, None, n_subtile, mma_consumer_state.index)],
+                        tTMEM_load_rAcc,
+                    )
+
+                    for idx in cutlass.range(
+                        cute.size(tTMEM_load_rAcc, mode=[0]), unroll_full=True
+                    ):
+                        # exp_logits
+                        tTMEM_load_rAcc[idx] = cute.exp(tTMEM_load_rAcc[idx] - tMrMaximum[0])
+
+                        position: cutlass.Int64 = (
+                            rank * problem_mnk[1]
+                            + split_idx * self.vocab_per_split
+                            + pidn * self.epi_tile[1]
+                            + n_subtile * cute.size(tTMEM_load_rAcc, mode=[0])
+                            + idx
+                        )
+                        mask: cutlass.Boolean = (
+                            position == tMrLabels[0] and tMrLabels[0] != ignore_index
+                        )
+                        # d_logits
+                        tTMEM_load_rAcc[idx] *= tMr_d_acc_exp_logits
+                        tTMEM_load_rAcc[idx] += mask * -tMrDlogprobs[0]
+                        dLogits_half[idx] = tTMEM_load_rAcc[idx].to(dLogits_half.element_type)
+
+                    for idx in cutlass.range(cute.size(dLogits_half, mode=[1]), unroll_full=True):
+                        copy_id = n_subtile * cute.size(dLogits_half, mode=[1]) + idx
+                        cute.copy(
+                            tiled_copy_r2g,
+                            dLogits_half[(None, idx, None)],
+                            tR2GgDlogits[(None, None, copy_id)],
+                            pred=tR2GCAcc_pred[((0, None), None, copy_id)],
+                        )
+
+                mma_pipeline.consumer_release(mma_consumer_state)
+                mma_consumer_state.advance()
+
+            # ------ Deallocate TMEM ------ #
+            self.cta_sync_barrier.arrive_and_wait()
+            if warp_idx == self.empty_warp_ids[0]:
+                cute.arch.relinquish_tmem_alloc_permit()
+                cute.arch.dealloc_tmem(
+                    tmem_ptr, self.tmem_alloc_cols, is_two_cta=self.use_2cta_instrs
+                )
+
+        @cute.jit
+        def __call__(
+            self,
+            split_idx: cutlass.Int32,
+            hidden: cute.Tensor,
+            weight: cute.Tensor,
+            labels: cute.Tensor,
+            dlogprobs: cute.Tensor,
+            maximum: cute.Tensor,
+            accu: cute.Tensor,
+            dlogits_partial: cute.Tensor,
+            scalarNumValidTokens: cute.Pointer,
+            ignore_index: cutlass.Int64,
+            rank: cutlass.Int32,
+            stream: cuda.CUstream,
+        ) -> None:
+            a_dtype: Type[cutlass.Numeric] = hidden.element_type
+            b_dtype: Type[cutlass.Numeric] = weight.element_type
+
+            if cutlass.const_expr(hidden.element_type != weight.element_type):
+                raise RuntimeError(
+                    f"data type don't match: {hidden.element_type} v.s. {weight.element_type}"
+                )
+            if cutlass.const_expr(hidden.element_type not in [cutlass.Float16, cutlass.BFloat16]):
+                raise RuntimeError("hidden can only be FP16 or BF16")
+            if cutlass.const_expr(hidden.layout.shape[1] != weight.layout.shape[1]):
+                raise RuntimeError("K dimension doesn't match")
+
+            problem_mnk = (hidden.layout.shape[0], weight.layout.shape[0], hidden.layout.shape[1])
+            if cutlass.const_expr((problem_mnk[2] * a_dtype.width // 8) % 16 != 0):
+                raise RuntimeError(f"K dimension is not 16B aligned: {problem_mnk[2]}")
+            if cutlass.const_expr((problem_mnk[2] * b_dtype.width // 8) % 128 != 0):
+                raise RuntimeError(f"N dimension is not 128B aligned: {problem_mnk[1]}")
+
+            grid = self._compute_grid(
+                problem_mnk=problem_mnk,
+                cluster_shape_mn=self.cluster_shape_mn,
+                cta_tiler=self.mma_tiler,
+            )
+
+            a_major_mode = utils.LayoutEnum.from_tensor(hidden).mma_major_mode()
+            b_major_mode = utils.LayoutEnum.from_tensor(weight).mma_major_mode()
+
+            tiled_mma = sm100_utils.make_trivial_tiled_mma(
+                a_dtype,
+                a_major_mode,
+                b_major_mode,
+                self.acc_dtype,
+                self.cta_group,
+                self.mma_tiler[:2],
+            )
+            self._setup_attributes(tiled_mma, a_dtype, b_dtype)
+
+            self.epi_tile = self.cta_tile_shape_mnk[:2]
+
+            # Swizzle o [(tileM, tileK), loopM, loopK, stage]
+            a_smem_layout_staged = sm100_utils.make_smem_layout_a(
+                tiled_mma, self.mma_tiler, a_dtype, self.num_ab_stage
+            )
+            # Swizzle o [(tileN, tileK), loopN, loopK, stage]
+            b_smem_layout_staged = sm100_utils.make_smem_layout_b(
+                tiled_mma, self.mma_tiler, b_dtype, self.num_ab_stage
+            )
+            tma_load_op = cpasync.CopyBulkTensorTileG2SOp(self.cta_group)
+            tma_store_op = cpasync.CopyBulkTensorTileS2GOp()
+
+            # Swizzle o [(tileM, tileK), loopM, loopK]
+            a_smem_layout = cute.select(a_smem_layout_staged, mode=[0, 1, 2])
+            tma_atom_a, tma_tensor_a = cute.nvgpu.make_tiled_tma_atom_A(
+                tma_load_op,
+                hidden,
+                a_smem_layout,
+                self.mma_tiler,
+                tiled_mma,
+                self.cluster_layout_vmnk.shape,
+            )
+            # Swizzle o [(tileN, tileK), loopN, loopK]
+            b_smem_layout = cute.select(b_smem_layout_staged, mode=[0, 1, 2])
+            tma_atom_b, tma_tensor_b = cute.nvgpu.make_tiled_tma_atom_B(
+                tma_load_op,
+                weight,
+                b_smem_layout,
+                self.mma_tiler,
+                tiled_mma,
+                self.cluster_layout_vmnk.shape,
+            )
+            a_copy_size = cute.size_in_bytes(a_dtype, a_smem_layout)
+            b_copy_size = cute.size_in_bytes(b_dtype, b_smem_layout)
+            self.tma_copy_ab_bytes = a_copy_size + b_copy_size
+
+            @cute.struct
+            class SharedStorage:
+                """
+                The shared storage for the backward kernel.
+                """
+
+                load_ab_mbar_ptr: cute.struct.MemRange[cutlass.Int64, self.num_ab_stage * 2]
+                mma_mbar_ptr: cute.struct.MemRange[cutlass.Int64, self.num_acc_stage * 2]
+
+                tmem_dealloc_mbar_ptr: cute.struct.MemRange[cutlass.Int64, 1]
+                tmem_holding_buf: cutlass.Int32
+
+                sA: cute.struct.Align[
+                    cute.struct.MemRange[a_dtype, cute.cosize(a_smem_layout_staged)],
+                    self.buffer_align_bytes,
+                ]
+                sB: cute.struct.Align[
+                    cute.struct.MemRange[b_dtype, cute.cosize(b_smem_layout_staged)],
+                    self.buffer_align_bytes,
+                ]
+
+            self.shared_storage = SharedStorage
+
+            self.kernel(
+                split_idx,
+                tiled_mma,
+                tma_atom_a,
+                tma_tensor_a,
+                tma_atom_b,
+                tma_tensor_b,
+                labels,
+                dlogprobs,
+                maximum,
+                accu,
+                dlogits_partial,
+                scalarNumValidTokens,
+                ignore_index,
+                a_smem_layout_staged,
+                b_smem_layout_staged,
+                self.cluster_layout_vmnk,
+                problem_mnk,
+                rank,
+            ).launch(
+                grid=grid,
+                block=[self.threads_per_cta, 1, 1],
+                cluster=self.cluster_shape_mnk,
+                stream=stream,
+            )
+
+except ImportError:
+    logging.warning("Cutlass or CUDA bindings not found. BwdPartialDlogits will not be available.")
diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py b/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
new file mode 100644
index 00000000000..07e018b51ff
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/entry.py
@@ -0,0 +1,480 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+import logging
+import os
+import typing
+from dataclasses import dataclass, field
+from functools import lru_cache
+
+try:
+    import cuda.bindings.driver as cuda  # type: ignore
+    import cutlass
+    import cutlass.cute as cute
+    import torch
+    import torch.distributed as dist
+    import triton  # type: ignore
+    from cutlass.cute.runtime import from_dlpack
+
+    import megatron.core.fusions.linear_cross_entropy.utils as utils
+    from megatron.core.fusions.linear_cross_entropy.blackwell import (
+        bwd_partial_dlogits as bwd_partial_dlogits,
+    )
+    from megatron.core.fusions.linear_cross_entropy.blackwell import fwd_mainloop as fwd_mainloop
+    from megatron.core.fusions.linear_cross_entropy.blackwell import triton as triton_kernels
+
+    @dataclass
+    class FwdConfig:
+        """
+        The configuration for the forward pass.
+        """
+
+        _dedicated_stream: torch.cuda.Stream = field(default_factory=torch.cuda.Stream)
+        _dedicated_events: typing.List[torch.cuda.Event] = field(default_factory=list)
+        _initialized: bool = field(default=False)
+        _fwd_mainloop_kernels: typing.Dict[str, cute.kernel] = field(default_factory=dict)
+        _vocab_per_split: int = field(
+            default=int(os.environ.get("LCE_FWD_VOCAB_SPLIT_SIZE", 512 * 6))
+        )
+
+    @dataclass
+    class BwdConfig:
+        """
+        The configuration for the backward pass.
+        """
+
+        _bwd_kernel: typing.Dict[str, cute.kernel] = field(default_factory=dict)
+        _vocab_per_split: int = field(
+            default=int(os.environ.get("LCE_BWD_VOCAB_SPLIT_SIZE", 512 * 6))
+        )
+        _backward_method: utils.BackwardMethodEnum = field(
+            default=utils.BackwardMethodEnum.kDlogitsSplitN
+        )
+
+    @lru_cache(maxsize=1)
+    def _get_fwd_config() -> FwdConfig:
+        """
+        Helper function to lazy initialize the forward configuration.
+        """
+        return FwdConfig()
+
+    @lru_cache(maxsize=1)
+    def _get_bwd_config() -> BwdConfig:
+        """
+        Helper function to lazy initialize the backward configuration.
+        """
+        return BwdConfig()
+
+    def forward(
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: torch.Tensor,
+        tp_group: typing.Optional[torch.distributed.ProcessGroup] = None,
+        reduction: typing.Literal["none", "sum", "mean"] = "mean",
+        ignore_index: int = -100,
+        sequence_parallel: bool = False,
+    ) -> typing.Tuple[
+        torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, int, int, torch.Tensor
+    ]:
+        """
+        forward host function
+        """
+        tp_rank = 0 if tp_group is None else torch.distributed.get_rank(tp_group)
+        tp_world_size = 1 if tp_group is None else torch.distributed.get_world_size(tp_group)
+        in_tp_mode = (tp_group is not None) and (tp_world_size > 1)
+
+        assert hidden.is_cuda and weight.is_cuda and labels.is_cuda
+        assert weight.device == hidden.device and labels.device == hidden.device
+
+        # hidden could be [batch, seqlen, dim] or [seqlen, batch, dim] or [tokens, dim]
+        assert hidden.dim() == 2 or hidden.dim() == 3
+        # weight must be [vocab_size, dim]
+        assert weight.dim() == 2
+        # labels could be [batch, seqlen] or [seqlen, batch] or [tokens]
+        assert (hidden.dim() == 2 and labels.dim() == 1) or (
+            hidden.dim() == 3 and labels.dim() == 2
+        )
+        assert hidden.is_contiguous() and weight.is_contiguous() and labels.is_contiguous()
+
+        hidden_view = hidden.view(-1, hidden.shape[-1])
+        labels_view = labels.view(-1)
+
+        assert (
+            sequence_parallel and hidden_view.shape[0] * tp_world_size == labels_view.shape[0]
+        ) or (not sequence_parallel and hidden_view.shape[0] == labels_view.shape[0])
+        assert hidden_view.shape[1] == weight.shape[1]
+
+        global_hidden = hidden
+        if in_tp_mode and sequence_parallel:
+            partial_hidden_shape = hidden.shape
+            global_hidden_shape = (
+                partial_hidden_shape[0] * tp_world_size,
+                *partial_hidden_shape[1:],
+            )
+            global_hidden = torch.empty(
+                global_hidden_shape, dtype=hidden.dtype, device=hidden.device
+            )
+            dist.all_gather_into_tensor(global_hidden, hidden, group=tp_group)
+            assert global_hidden.is_contiguous()
+            hidden_view = global_hidden.view(-1, global_hidden.shape[-1])
+
+        num_tokens, dim = hidden_view.shape
+        vocab_size, _ = weight.shape
+
+        if not _get_fwd_config()._initialized:
+            _get_fwd_config()._dedicated_stream = torch.cuda.Stream(hidden.device)
+            _get_fwd_config()._dedicated_events = [torch.cuda.Event() for _ in range(2)]
+            _get_fwd_config()._initialized = True
+
+        REDUCTION = utils.str_to_reduction_enum(reduction)
+        # declare logprobs
+        if REDUCTION == utils.EntropyReductionEnum.kNone:
+            logprobs = torch.empty((num_tokens,), device=hidden.device, dtype=torch.float32)
+            if in_tp_mode:
+                logprobs.zero_()
+        else:
+            logprobs = torch.zeros((), device=hidden.device, dtype=torch.float32)
+        # declare auxiliary tensors
+        maximum = torch.empty((num_tokens,), device=hidden.device, dtype=torch.float32)
+        accumulate = torch.empty_like(maximum, dtype=torch.float32)
+        num_valid_tokens = torch.empty((), device=hidden.device, dtype=torch.int64)
+        assert (
+            maximum.is_contiguous()
+            and accumulate.is_contiguous()
+            and num_valid_tokens.is_contiguous()
+        )
+        # declare intermediate tensors
+        # NOTE: this is a parameter for tuning
+        num_splits = (
+            vocab_size + _get_fwd_config()._vocab_per_split - 1
+        ) // _get_fwd_config()._vocab_per_split
+        _max = torch.empty((num_tokens, num_splits), device=hidden.device, dtype=torch.float32)
+        _accu = torch.empty((num_tokens, num_splits), device=hidden.device, dtype=torch.float32)
+        if REDUCTION == utils.EntropyReductionEnum.kNone:
+            _logprobs = logprobs
+        else:
+            _logprobs = torch.empty((num_tokens,), device=hidden.device, dtype=torch.float32)
+            if in_tp_mode:
+                _logprobs.zero_()
+        assert _max.is_contiguous() and _accu.is_contiguous() and _logprobs.is_contiguous()
+
+        triton_kernels.get_num_valid_tokens[(1,)](
+            num_tokens, ignore_index, labels_view, labels_view.stride(0), num_valid_tokens
+        )
+
+        # need to compile the kernel for the first time
+        hidden_packed = from_dlpack(
+            hidden_view.detach(), assumed_align=16
+        ).mark_compact_shape_dynamic(mode=0)
+        weight_packed = from_dlpack(weight.detach(), assumed_align=16)
+        labels_packed = from_dlpack(
+            labels_view.detach(), assumed_align=8
+        ).mark_compact_shape_dynamic(mode=0)
+        logprobs_packed = from_dlpack(_logprobs, assumed_align=16).mark_compact_shape_dynamic(
+            mode=0
+        )
+        _max_packed = from_dlpack(_max, assumed_align=8).mark_compact_shape_dynamic(
+            mode=0, stride_order=(0, 1)
+        )
+        _accu_packed = from_dlpack(_accu, assumed_align=8).mark_compact_shape_dynamic(
+            mode=0, stride_order=(0, 1)
+        )
+        cuda_stream = cuda.CUstream(torch.cuda.current_stream().cuda_stream)
+
+        # VocabSize and Dim are fixed for a given model,
+        # only the number of tokens can vary
+        key = f"vocab_size:{vocab_size}+dim:{dim}+dtype:{hidden_view.dtype}"
+        if _get_fwd_config()._fwd_mainloop_kernels.get(key) is None:
+            fwd_mainloop_kernel = fwd_mainloop.FwdMainLoop(
+                vocab_per_split=_get_fwd_config()._vocab_per_split
+            )
+            fwd_mainloop_compiled_kernel = cute.compile(
+                fwd_mainloop_kernel,
+                hidden_packed,
+                weight_packed,
+                labels_packed,
+                logprobs_packed,
+                _max_packed,
+                _accu_packed,
+                ignore_index,
+                tp_rank,
+                cuda_stream,
+            )
+            _get_fwd_config()._fwd_mainloop_kernels[key] = fwd_mainloop_compiled_kernel
+        else:
+            fwd_mainloop_compiled_kernel = _get_fwd_config()._fwd_mainloop_kernels[key]
+        fwd_mainloop_compiled_kernel(
+            hidden_packed,
+            weight_packed,
+            labels_packed,
+            logprobs_packed,
+            _max_packed,
+            _accu_packed,
+            ignore_index,
+            tp_rank,
+            cuda_stream,
+        )
+
+        if not in_tp_mode:
+
+            def grid(meta):
+                return (triton.cdiv(num_tokens, meta["BLOCK_SIZE_M"]),)
+
+            triton_kernels.forward_dp_epilogue[grid](
+                num_tokens,
+                num_splits,
+                ignore_index,
+                labels_view,
+                labels_view.stride(0),
+                num_valid_tokens,
+                _max,
+                _max.stride(0),
+                _max.stride(1),
+                _accu,
+                _accu.stride(0),
+                _accu.stride(1),
+                maximum,
+                maximum.stride(0),
+                accumulate,
+                maximum.stride(0),
+                _logprobs,
+                _logprobs.stride(0),
+                logprobs,
+                triton.language.constexpr(REDUCTION.value),
+            )
+        else:
+            _max_backup = _max.clone()
+            dist.all_reduce(_max, op=dist.ReduceOp.MAX, group=tp_group)
+
+            torch.cuda.current_stream().record_event(_get_fwd_config()._dedicated_events[0])
+            with torch.cuda.stream(_get_fwd_config()._dedicated_stream):
+                _get_fwd_config()._dedicated_stream.wait_event(
+                    _get_fwd_config()._dedicated_events[0]
+                )
+                dist.all_reduce(_logprobs, op=dist.ReduceOp.SUM, group=tp_group)
+                _get_fwd_config()._dedicated_stream.record_event(
+                    _get_fwd_config()._dedicated_events[1]
+                )
+
+            def grid(meta):
+                return (triton.cdiv(num_tokens, meta["BLOCK_SIZE_M"]),)
+
+            triton_kernels.forward_tp_epilogue[grid](
+                num_tokens,
+                num_splits,
+                _max,
+                _max.stride(0),
+                _max.stride(1),
+                _max_backup,
+                _max_backup.stride(0),
+                _max_backup.stride(1),
+                _accu,
+                _accu.stride(0),
+                _accu.stride(1),
+                maximum,
+                maximum.stride(0),
+                accumulate,
+                maximum.stride(0),
+            )
+            # reduce accumulate
+            dist.all_reduce(accumulate, op=dist.ReduceOp.SUM, group=tp_group)
+
+            # update logprobs
+            torch.cuda.current_stream().wait_event(_get_fwd_config()._dedicated_events[1])
+            triton_kernels.forward_tp_epilogue_update_logprobs[grid](
+                num_tokens,
+                ignore_index,
+                num_valid_tokens,
+                labels_view,
+                labels_view.stride(0),
+                _logprobs,
+                _logprobs.stride(0),
+                maximum,
+                maximum.stride(0),
+                accumulate,
+                accumulate.stride(0),
+                logprobs,
+                REDUCTION.value,
+            )
+
+        return (
+            logprobs,
+            maximum,
+            accumulate,
+            num_valid_tokens,
+            tp_rank,
+            tp_world_size,
+            global_hidden,
+        )
+
+    def backward(
+        dlogprobs: torch.Tensor,
+        global_hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: torch.Tensor,
+        maximum: torch.Tensor,
+        accu: torch.Tensor,
+        num_valid_tokens: torch.Tensor,
+        reduction: typing.Literal["none", "sum", "mean"] = "mean",
+        ignore_index: int = -100,
+        tp_group: typing.Optional[dist.ProcessGroup] = None,
+        tp_rank: int = 0,
+        tp_world_size: int = 1,
+        sequence_parallel: bool = False,
+    ) -> typing.Tuple[torch.Tensor, torch.Tensor]:
+        """
+        backward host function
+        """
+        in_tp_mode = (tp_group is not None) and (tp_world_size > 1)
+
+        hidden_view = global_hidden.view(-1, global_hidden.shape[-1])
+        labels_view = labels.view(-1)
+
+        num_tokens, dim = hidden_view.shape
+        vocab_size, _ = weight.shape
+
+        REDUCTION = utils.str_to_reduction_enum(reduction)
+        dlogprobs_view = dlogprobs.view(-1)
+        assert (
+            REDUCTION == utils.EntropyReductionEnum.kNone and dlogprobs.shape == (num_tokens,)
+        ) or (REDUCTION != utils.EntropyReductionEnum.kNone and dlogprobs.dim() == 0)
+        assert dlogprobs.is_contiguous() and dlogprobs.is_cuda
+
+        assert (
+            num_valid_tokens.dim() == 0
+            and num_valid_tokens.is_cuda
+            and num_valid_tokens.dtype == torch.int64
+        )
+
+        # Allocate d_hidden in float32 for better numerical stability
+        d_hidden = torch.empty_like(global_hidden, dtype=torch.float32)
+        d_weight = torch.empty_like(weight)
+        assert d_hidden.is_contiguous() and d_weight.is_contiguous()
+
+        # FIXME: implement different backward methods
+        _backward_method = _get_bwd_config()._backward_method
+        if _backward_method == utils.BackwardMethodEnum.kDlogitsSplitN:
+            vocab_per_split = _get_bwd_config()._vocab_per_split
+            num_splits = (vocab_size + vocab_per_split - 1) // vocab_per_split
+
+            _d_logits = torch.empty(
+                (num_tokens, vocab_per_split),
+                device=global_hidden.device,
+                dtype=global_hidden.dtype,
+            )
+
+            hidden_packed = from_dlpack(
+                hidden_view.detach(), assumed_align=16
+            ).mark_compact_shape_dynamic(mode=0)
+            weight_packed = from_dlpack(weight.detach(), assumed_align=16)
+            labels_packed = from_dlpack(
+                labels_view.detach(), assumed_align=8
+            ).mark_compact_shape_dynamic(mode=0)
+            dlogprobs_packed = from_dlpack(
+                dlogprobs_view.detach(), assumed_align=8
+            ).mark_compact_shape_dynamic(mode=0)
+            maximum_packed = from_dlpack(
+                maximum.detach(), assumed_align=8
+            ).mark_compact_shape_dynamic(mode=0)
+            accu_packed = from_dlpack(accu.detach(), assumed_align=8).mark_compact_shape_dynamic(
+                mode=0
+            )
+            dlogits_packed = from_dlpack(_d_logits, assumed_align=32).mark_compact_shape_dynamic(
+                mode=0
+            )
+            scalarNumValidTokens_packed = cute.runtime.make_ptr(
+                cutlass.Int64, num_valid_tokens.data_ptr(), cute.AddressSpace.gmem, assumed_align=8
+            )
+
+            stream = cuda.CUstream(torch.cuda.current_stream().cuda_stream)
+
+            key = (
+                f"vocab_size:{vocab_size}+dim:{dim}+reduction:{REDUCTION}+dtype:{hidden_view.dtype}"
+            )
+            if _get_bwd_config()._bwd_kernel.get(key) is None:
+                bwd_kernel = bwd_partial_dlogits.BwdPartialDlogits(
+                    reduction=REDUCTION.value, vocab_per_split=vocab_per_split
+                )
+                bwd_kernel_compiled = cute.compile(
+                    bwd_kernel,
+                    0,  # split_idx
+                    hidden_packed,
+                    weight_packed,
+                    labels_packed,
+                    dlogprobs_packed,
+                    maximum_packed,
+                    accu_packed,
+                    dlogits_packed,
+                    scalarNumValidTokens_packed,
+                    ignore_index,
+                    tp_rank,
+                    stream,
+                )
+                _get_bwd_config()._bwd_kernel[key] = bwd_kernel_compiled
+            else:
+                bwd_kernel_compiled = _get_bwd_config()._bwd_kernel.get(key)
+
+            for split_idx in range(num_splits):
+                bwd_kernel_compiled(
+                    split_idx,
+                    hidden_packed,
+                    weight_packed,
+                    labels_packed,
+                    dlogprobs_packed,
+                    maximum_packed,
+                    accu_packed,
+                    dlogits_packed,
+                    scalarNumValidTokens_packed,
+                    ignore_index,
+                    tp_rank,
+                    stream,
+                )
+                # remove padding areas
+                # cublas can handle non-contiguous tensors
+                # therefore, we do not need to contiguous the tensor
+                vocab_right_bound = (
+                    min((split_idx + 1) * vocab_per_split, vocab_size) - split_idx * vocab_per_split
+                )
+                valid_d_logits = _d_logits[:, :vocab_right_bound]
+
+                _delta_hidden = torch.mm(
+                    valid_d_logits,
+                    weight[split_idx * vocab_per_split : (split_idx + 1) * vocab_per_split, :],
+                    out_dtype=torch.float32,
+                ).view_as(d_hidden)
+                if split_idx == 0:
+                    d_hidden.copy_(_delta_hidden)
+                else:
+                    d_hidden.add_(_delta_hidden)
+                torch.matmul(
+                    valid_d_logits.T,
+                    hidden_view,
+                    out=d_weight[
+                        split_idx * vocab_per_split : (split_idx + 1) * vocab_per_split, :
+                    ],
+                )
+        else:
+            raise NotImplementedError(f"Unsupported backward method: {_backward_method}")
+
+        if in_tp_mode:
+            dist.all_reduce(d_hidden, op=dist.ReduceOp.SUM, group=tp_group)
+            if sequence_parallel:
+                partial_hidden_shape = (
+                    global_hidden.shape[0] // tp_world_size,
+                    *global_hidden.shape[1:],
+                )
+                partial_num_tokens = num_tokens // tp_world_size
+                d_hidden = d_hidden.view(-1, d_hidden.shape[-1])[
+                    tp_rank * partial_num_tokens : (tp_rank + 1) * partial_num_tokens, :
+                ]
+                d_hidden = d_hidden.view(partial_hidden_shape).clone()
+
+        # convert d_hidden to the original dtype
+        d_hidden = d_hidden.type_as(global_hidden)
+
+        return d_hidden, d_weight
+
+except ImportError:
+    logging.warning(
+        "Cutlass or CUDA bindings not found. LinearCrossEntropy Blackwell entry "
+        "points will not be available."
+    )
diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/fwd_mainloop.py b/megatron/core/fusions/linear_cross_entropy/blackwell/fwd_mainloop.py
new file mode 100644
index 00000000000..93f5b9523e7
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/fwd_mainloop.py
@@ -0,0 +1,693 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+"""
+Implementations of the fusion lm_head(Linear) + Cross-Entropy kernel
+"""
+
+import logging
+from typing import Tuple, Type
+
+try:
+    import cuda.bindings.driver as cuda  # type: ignore
+    import cutlass
+    import cutlass.cute as cute
+    import cutlass.pipeline as pipeline  # type: ignore
+    import cutlass.utils as utils  # type: ignore
+    import cutlass.utils.blackwell_helpers as sm100_utils  # type: ignore
+    from cutlass.cute.nvgpu import cpasync, tcgen05
+
+    SM100_TMEM_CAPACITY_COLUMNS: int = 512
+
+    def make_thread_cooperative_group(size: int):
+        """
+        Create a thread cooperative group.
+        """
+        return pipeline.CooperativeGroup(pipeline.Agent.Thread, size, alignment=size)
+
+    class FwdMainLoop:
+        """
+        This class implements the mainloop for forward process.
+
+        Traits stored as attributes.
+
+        :param acc_dtype:
+        """
+
+        def __init__(
+            self,
+            acc_dtype: Type[cutlass.Numeric] = cutlass.Float32,
+            use_2cta_instrs: bool = False,
+            mma_tiler_mn: Tuple[int, int] = (128, 256),
+            vocab_per_split: int = 512,
+        ):
+            """
+            Configuration including:
+                - MMA instruction settings
+                - Cluster Shape
+            """
+            self.acc_dtype: Type[cutlass.Numeric] = acc_dtype
+            self.use_2cta_instrs = use_2cta_instrs
+            # This is the shape covered by tiledMMA, not just single MMA instruction
+            self.mma_tiler = (*mma_tiler_mn, 1)
+            self.cta_tiler = (self.mma_tiler[0], vocab_per_split, self.mma_tiler[2])
+            self.vocab_per_split = vocab_per_split
+
+            self.cta_group = tcgen05.CtaGroup.TWO if self.use_2cta_instrs else tcgen05.CtaGroup.ONE
+            self.cluster_shape_mn = (2, 1) if self.use_2cta_instrs else (1, 1)
+
+            self.occupancy = 1
+            # query SMEM capacity
+            self.smem_capacity = utils.get_smem_capacity_in_bytes("sm_100")
+
+            # the maximum columns per MMA is 256, and there is only one GEMM, so we can fully
+            # assign TMEM for that GEMM of different tiles.
+            # so 512 = 2 * 256
+
+            self.threads_per_warp: int = 32
+            # 1 warp for loading, 1 warp for issuing MMA, 1 WG for storing
+            self.epi_warp_ids = (0, 1, 2, 3)
+            self.load_warp_ids = 4
+            self.mma_warp_ids = 5
+            self.empty_warp_ids = (6, 7)
+
+            self.threads_per_cta: int = self.threads_per_warp * len(
+                (*self.epi_warp_ids, self.load_warp_ids, self.mma_warp_ids, *self.empty_warp_ids)
+            )
+
+            self.cta_sync_barrier = pipeline.NamedBarrier(
+                barrier_id=1, num_threads=self.threads_per_cta
+            )
+            self.tmem_alloc_barrier = pipeline.NamedBarrier(
+                barrier_id=2, num_threads=self.threads_per_cta
+            )
+
+            self.buffer_align_bytes: int = 1024
+            self.num_regs_other: int = 32
+            self.num_regs_epi: int = 192
+
+        def _compute_stages(
+            self,
+            tiled_mma: cute.TiledMma,
+            mma_tiler: Tuple[int, int, int],
+            a_dtype: Type[cutlass.Numeric],
+            b_dtype: Type[cutlass.Numeric],
+        ):
+            a_smem_layout_stage_one = sm100_utils.make_smem_layout_a(
+                tiled_mma, mma_tiler, a_dtype, 1  # only single stage
+            )
+            b_smem_layout_stage_one = sm100_utils.make_smem_layout_b(
+                tiled_mma, mma_tiler, b_dtype, 1
+            )
+            a_bytes_per_stage = cute.size_in_bytes(a_dtype, a_smem_layout_stage_one)
+            b_bytes_per_stage = cute.size_in_bytes(b_dtype, b_smem_layout_stage_one)
+            num_acc_stage = 2
+            num_a_stage = 4
+            num_b_stage = 4
+            num_epi_stage_per_tile = 4
+
+            return num_acc_stage, num_a_stage, num_b_stage, num_epi_stage_per_tile
+
+        def _setup_attributes(
+            self,
+            tiled_mma: cute.TiledMma,
+            a_dtype: Type[cutlass.Numeric],
+            b_dtype: Type[cutlass.Numeric],
+        ):
+            self.cluster_shape_mnk = (*self.cluster_shape_mn, 1)
+            self.cluster_layout_vmnk = cute.tiled_divide(
+                cute.make_layout(self.cluster_shape_mnk), (tiled_mma.thr_id.shape,)
+            )
+
+            # this is fixed for dense MMA, k=16
+            mma_inst_shape_k = cute.size(tiled_mma.shape_mnk, mode=[2])
+            # 16*4 = 64; 64 * sizeof(FP16) = 128Bytes
+            mma_inst_tile_k: int = 4
+            self.mma_tiler = (
+                self.mma_tiler[0],
+                self.mma_tiler[1],
+                mma_inst_shape_k * mma_inst_tile_k,
+            )
+
+            self.num_acc_stage, self.num_a_stage, self.num_b_stage, self.num_epi_stage_per_tile = (
+                self._compute_stages(tiled_mma, self.mma_tiler, a_dtype, b_dtype)
+            )
+            self.tmem_alloc_cols = self.num_acc_stage * self.mma_tiler[1]
+            assert self.tmem_alloc_cols <= SM100_TMEM_CAPACITY_COLUMNS
+
+            self.cta_tile_shape_mnk = (
+                self.mma_tiler[0] // cute.size(tiled_mma.thr_id.shape),
+                self.mma_tiler[1],
+                self.mma_tiler[2],
+            )
+
+        @cute.kernel
+        def kernel(
+            self,
+            tiled_mma: cute.TiledMma,
+            tma_atom_a: cute.CopyAtom,
+            mA: cute.Tensor,
+            tma_atom_b: cute.CopyAtom,
+            mB: cute.Tensor,
+            mLabels: cute.Tensor,
+            mMax: cute.Tensor,
+            mAccu: cute.Tensor,
+            mLogprobs: cute.Tensor,
+            a_smem_layout_staged: cute.ComposedLayout,
+            b_smem_layout_staged: cute.ComposedLayout,
+            cluster_layout_vmnk: cute.Layout,
+            problem_mnk: Tuple[int, int, int],
+            ignore_index: cutlass.Int64,
+            rank: cutlass.Int32,
+        ):
+            """
+            The forward kernel for the mainloop.
+            """
+            warp_idx = cute.arch.make_warp_uniform(cute.arch.warp_idx())
+            tidx, _, _ = cute.arch.thread_idx()
+            bidx, bidy, _ = cute.arch.block_idx()
+            # FIXME: block swizzling applied here
+            pidm, pidn = bidx, bidy
+
+            # prefetch tma descriptors
+            if warp_idx == self.load_warp_ids:
+                cute.nvgpu.cpasync.prefetch_descriptor(tma_atom_a)
+                cute.nvgpu.cpasync.prefetch_descriptor(tma_atom_b)
+
+            # declare SMEM
+            smem = utils.SmemAllocator()
+            storage = smem.allocate(self.shared_storage)
+
+            ab_pipeline = pipeline.PipelineTmaUmma.create(
+                num_stages=self.num_a_stage,
+                producer_group=make_thread_cooperative_group(len([self.load_warp_ids])),
+                consumer_group=make_thread_cooperative_group(len([self.mma_warp_ids])),
+                tx_count=self.tma_copy_a_bytes + self.tma_copy_b_bytes,
+                barrier_storage=storage.load_ab_mbar_ptr.data_ptr(),
+            )
+            ab_producer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Producer, self.num_a_stage
+            )
+            ab_consumer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Consumer, self.num_a_stage
+            )
+
+            mma_pipeline = pipeline.PipelineUmmaAsync.create(
+                num_stages=self.num_acc_stage,
+                producer_group=make_thread_cooperative_group(len([self.mma_warp_ids])),
+                consumer_group=make_thread_cooperative_group(
+                    self.threads_per_warp * len(self.epi_warp_ids)
+                ),
+                barrier_storage=storage.mma_mbar_ptr.data_ptr(),
+            )
+            mma_producer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Producer, self.num_acc_stage
+            )
+            mma_consumer_state = pipeline.make_pipeline_state(
+                pipeline.PipelineUserType.Consumer, self.num_acc_stage
+            )
+
+            tmem_dealloc_mbar_ptr = storage.tmem_dealloc_mbar_ptr.data_ptr()
+            if warp_idx == self.empty_warp_ids[0]:
+                with cute.arch.elect_one():
+                    cute.arch.mbarrier_init(
+                        tmem_dealloc_mbar_ptr, self.threads_per_warp * len(self.epi_warp_ids)
+                    )
+                    cute.arch.mbarrier_init_fence()
+
+            # -------- SMEM partition ------------ #
+            # swizzle o [(tileM, tileK), loopM, loopK, Stage]
+            sA = storage.sA.get_tensor(
+                a_smem_layout_staged.outer, swizzle=a_smem_layout_staged.inner
+            )
+            # swizzle o [(tileN, tileK), loopN, loopK, stage]
+            sB = storage.sB.get_tensor(
+                b_smem_layout_staged.outer, swizzle=b_smem_layout_staged.inner
+            )
+
+            # FIXME: if 2 CTAs, modify here
+            thr_mma = tiled_mma.get_slice(0)
+            # [MMA, loopM, loopK, stage]
+            tCsA = thr_mma.make_fragment_A(sA)
+            # [MMA, loopN, loopK, stage]
+            tCsB = thr_mma.make_fragment_B(sB)
+
+            # ---------- GMEM partition ----------- #
+            # [tileM, tileK, loopK]
+            gA = cute.local_tile(mA, (self.mma_tiler[0], self.mma_tiler[2]), (pidm, None))
+
+            # [vocab_size_per_split, dim]
+            mB_n = cute.local_tile(
+                mB, (self.vocab_per_split, cute.size(mB.layout.shape, mode=[1])), (pidn, 0)
+            )
+
+            # [tileN, tileK, loopN, loopK]
+            gB = cute.local_tile(mB_n, (self.mma_tiler[1], self.mma_tiler[2]), (None, None))
+
+            # [MMA, tileCntM, tileCntK, loopK]
+            tCgA = thr_mma.partition_A(gA)
+            # [MMA, tileCntN, tileCntK, loopN, loopK]
+            tCgB = thr_mma.partition_B(gB)
+
+            a_cta_layout = cute.make_layout(cute.slice_(cluster_layout_vmnk, (0, 0, None, 0)).shape)
+            # FIXME: if 2 CTAs, modify here
+            cta_rank_in_cluster = 0
+            block_in_cluster_coord_vmnk = cluster_layout_vmnk.get_flat_coord(cta_rank_in_cluster)
+            tTMAsA, tTMAgA = cpasync.tma_partition(
+                tma_atom_a,
+                block_in_cluster_coord_vmnk[2],  # cta_coord,
+                a_cta_layout,
+                cute.group_modes(sA, 0, 3),  # SMEM tensor
+                cute.group_modes(tCgA, 0, 3),  # GMEM tensor
+            )
+            b_cta_layout = cute.make_layout(cute.slice_(cluster_layout_vmnk, (0, None, 0, 0)).shape)
+            tTMAsB, tTMAgB = cpasync.tma_partition(
+                tma_atom_b,
+                block_in_cluster_coord_vmnk[1],  # cta_coord
+                b_cta_layout,
+                cute.group_modes(sB, 0, 3),
+                cute.group_modes(tCgB, 0, 3),
+            )
+
+            # Allocate TMEM
+            tmem_holding_buf = storage.tmem_holding_buf
+            if warp_idx == self.empty_warp_ids[0]:
+                cute.arch.alloc_tmem(
+                    self.tmem_alloc_cols, tmem_holding_buf, is_two_cta=self.use_2cta_instrs
+                )
+            self.cta_sync_barrier.arrive_and_wait()
+            tmem_ptr = cute.arch.retrieve_tmem_ptr(
+                self.acc_dtype, alignment=16, ptr_to_buffer_holding_addr=tmem_holding_buf
+            )
+
+            # [(tileM, tileN), loopM, loopN]
+            tmem_shape = (128, self.tmem_alloc_cols)
+            acc_shape = thr_mma.partition_shape_C(tmem_shape)
+            tCtC_fake = thr_mma.make_fragment_C(acc_shape)
+            tCtC = cute.make_tensor(tmem_ptr, tCtC_fake.layout)
+
+            block_vocab_left_idx: cutlass.Int64 = pidn * self.vocab_per_split
+            block_vocab_right_idx: cutlass.Int64 = min(
+                (pidn + 1) * self.vocab_per_split, problem_mnk[1]
+            )
+            num_n_tiles: cutlass.Int64 = cute.ceil_div(
+                (block_vocab_right_idx - block_vocab_left_idx), self.mma_tiler[1]
+            )
+
+            # ///////
+            # empty
+            # ///////
+            if warp_idx in self.empty_warp_ids:
+                cute.arch.warpgroup_reg_dealloc(self.num_regs_other)
+
+            # ///////
+            # load
+            # ///////
+            if warp_idx == self.load_warp_ids:
+                cute.arch.warpgroup_reg_dealloc(self.num_regs_other)
+
+                for n in cutlass.range(num_n_tiles):
+                    for k in cutlass.range(cute.size(gA, mode=[2])):
+                        ab_pipeline.producer_acquire(ab_producer_state)
+                        cute.copy(
+                            tma_atom_a,
+                            tTMAgA[(None, k)],
+                            tTMAsA[(None, ab_producer_state.index)],
+                            tma_bar_ptr=ab_pipeline.producer_get_barrier(ab_producer_state),
+                        )
+                        cute.copy(
+                            tma_atom_b,
+                            tTMAgB[(None, n, k)],
+                            tTMAsB[(None, ab_producer_state.index)],
+                            tma_bar_ptr=ab_pipeline.producer_get_barrier(ab_producer_state),
+                        )
+                        ab_pipeline.producer_commit(ab_producer_state)
+                        ab_producer_state.advance()
+
+            # ///////
+            # mma
+            # ///////
+            if warp_idx == self.mma_warp_ids:
+                cute.arch.warpgroup_reg_dealloc(self.num_regs_other)
+
+                for n in cutlass.range(num_n_tiles):
+                    # disable accumulate for the first tile
+                    tiled_mma.set(tcgen05.Field.ACCUMULATE, False)
+                    mma_pipeline.producer_acquire(mma_producer_state)
+
+                    for k in cutlass.range(cute.size(gA, mode=[2])):
+                        ab_pipeline.consumer_wait(ab_consumer_state)
+
+                        for kblock_idx in cutlass.range(
+                            cute.size(tCsA, mode=[2]), unroll_full=True
+                        ):
+                            cute.gemm(
+                                tiled_mma,
+                                cute.append_ones(tCtC[(None, None, mma_producer_state.index)]),
+                                tCsA[(None, None, kblock_idx, ab_consumer_state.index)],
+                                tCsB[(None, None, kblock_idx, ab_consumer_state.index)],
+                                cute.append_ones(tCtC[(None, None, mma_producer_state.index)]),
+                            )
+                            # enable accumulate for the next tile
+                            tiled_mma.set(tcgen05.Field.ACCUMULATE, True)
+
+                        ab_pipeline.consumer_release(ab_consumer_state)
+                        ab_consumer_state.advance()
+
+                    mma_pipeline.producer_commit(mma_producer_state)
+                    mma_producer_state.advance()
+
+            # //////////
+            # epilogue
+            # //////////
+            if warp_idx in self.epi_warp_ids:
+                cute.arch.warpgroup_reg_alloc(self.num_regs_epi)
+
+                # epilog TMEM copy and partition
+                copy_atom_t2r = sm100_utils.get_tmem_load_op(
+                    self.cta_tile_shape_mnk,
+                    utils.LayoutEnum.ROW_MAJOR,  # This is hard-coded
+                    self.acc_dtype,
+                    self.acc_dtype,
+                    (self.epi_tile[0], self.epi_tile[1] // self.num_epi_stage_per_tile),
+                    self.use_2cta_instrs,
+                )
+                # [tileM, subTileN, loopM, CntSubTileN, loopN]
+                tAcc_epi = cute.flat_divide(
+                    tCtC[((None, None), 0, None)],
+                    (self.epi_tile[0], self.epi_tile[1] // self.num_epi_stage_per_tile),
+                )
+                tiled_copy_t2r = tcgen05.make_tmem_copy(
+                    copy_atom_t2r, tAcc_epi[(None, None, 0, 0, 0)]
+                )
+                thr_copy_t2r = tiled_copy_t2r.get_slice(tidx)
+                tTMEM_load_tAcc = thr_copy_t2r.partition_S(tAcc_epi)
+                # [(pattern), loopM, loopN, CntTileM, CntTileN]
+                tTMEM_load_tAcc = cute.group_modes(
+                    tTMEM_load_tAcc, 3, cute.rank(tTMEM_load_tAcc) - 1
+                )
+
+                cAcc = cute.make_identity_tensor(self.mma_tiler[:2])
+                tCcAcc = thr_mma.partition_C(cAcc)
+                # [tileM, subTileN, loopM, CntSubTileN, CntTileN]
+                tCcAcc_epi = cute.flat_divide(
+                    tCcAcc[((None, None), 0, None)],
+                    (self.epi_tile[0], self.epi_tile[1] // self.num_epi_stage_per_tile),
+                )
+                tTMEM_load_cAcc = thr_copy_t2r.partition_D(tCcAcc_epi)
+                tTMEM_load_cAcc_shape = cute.select(tTMEM_load_cAcc.shape, mode=[0, 1, 2])
+
+                # epilogue layouts
+                epilogue_thread_layout = cute.make_layout((128, 1))
+                copy_atom_g2r = cute.make_copy_atom(
+                    cute.nvgpu.CopyUniversalOp(), mLabels.element_type
+                )
+                tiled_copy_g2r = cute.make_tiled_copy(
+                    copy_atom_g2r, epilogue_thread_layout, (128, 1)
+                )
+                thr_copy_g2r = tiled_copy_g2r.get_slice(tidx)
+
+                copy_atom_r2g = cute.make_copy_atom(cute.nvgpu.CopyUniversalOp(), cutlass.Float32)
+                tiled_copy_r2g = cute.make_tiled_copy(
+                    copy_atom_r2g, epilogue_thread_layout, (128, 1)
+                )
+                thr_copy_r2g = tiled_copy_r2g.get_slice(tidx)
+
+                # auxiliary tensors
+                # [tileM]
+                gLabels = cute.local_tile(mLabels, (self.epi_tile[0],), (pidm,))
+
+                tLabelsCAcc = thr_copy_g2r.partition_S(cAcc)[(None, None, 0)]
+                tLabelsCAcc_mask = cute.make_fragment(tLabelsCAcc.shape, cutlass.Boolean)
+                # [(1, 1), 1]
+                tLabelsCAcc_mask[0] = cute.elem_less(pidm * self.epi_tile[0] + tidx, problem_mnk[0])
+                # to align shape with gMax and gAccu
+                tLabelsCAcc_mask = cute.append_ones(tLabelsCAcc_mask)
+
+                # [(1, 1), 1, 1]
+                tLabelsgLabels = thr_copy_g2r.partition_S(cute.append_ones(gLabels))
+                tLabelsrLabels = cute.make_fragment(
+                    tLabelsgLabels.shape, tLabelsgLabels.element_type
+                )
+                cute.copy(tiled_copy_g2r, tLabelsgLabels, tLabelsrLabels, pred=tLabelsCAcc_mask)
+                valid_mask: cutlass.Boolean = (
+                    tLabelsrLabels[0] != ignore_index
+                ) and tLabelsCAcc_mask[0]
+
+                # [tileM, 1]
+                gMax = cute.local_tile(mMax, (self.epi_tile[0], 1), (pidm, pidn))
+                # [(CPYM, CPYN), loopM, loopN]
+                tR2GgMax = thr_copy_r2g.partition_D(gMax)
+                tR2GrMax = cute.make_fragment(tR2GgMax.shape, tR2GgMax.element_type)
+                tR2GrMax.fill(-1e30)
+
+                # [tileM, 1]
+                gAccu = cute.local_tile(mAccu, (self.epi_tile[0], 1), (pidm, pidn))
+                # [(CPYM, CPYN), loopM, loopN]
+                tR2GgAccu = thr_copy_r2g.partition_D(gAccu)
+                tR2GrAccu = cute.make_fragment(tR2GgAccu.shape, tR2GgAccu.element_type)
+                tR2GrAccu.fill(0.0)
+
+                # [tileM, 1]
+                gLogprobs = cute.append_ones(
+                    cute.local_tile(mLogprobs, (self.epi_tile[0],), (pidm,))
+                )
+                # [(CPYM, CPYN), loopM, loopN]
+                tR2GgLogprobs = thr_copy_r2g.partition_D(gLogprobs)
+                tR2GrLogprobs = cute.make_fragment(tR2GgLogprobs.shape, tR2GgLogprobs.element_type)
+                tR2GrLogprobs.fill(0.0)
+
+                # [(tileN // num_epi_stage_per_tile, 1), 1, 1]
+                tTMEM_load_rAcc = cute.make_fragment(tTMEM_load_cAcc_shape, self.acc_dtype)
+
+                for n in cutlass.range(num_n_tiles):
+                    mma_pipeline.consumer_wait(mma_consumer_state)
+
+                    left: cutlass.Int64 = block_vocab_left_idx + n * self.epi_tile[1]
+                    right: cutlass.Int64 = min(
+                        (n + 1) * self.epi_tile[1] + block_vocab_left_idx, block_vocab_right_idx
+                    )
+                    num_n_subtiles: cutlass.Int64 = cute.ceil_div(
+                        (right - left), cute.size(tTMEM_load_rAcc, mode=[0])
+                    )
+                    for n_subtile in cutlass.range(num_n_subtiles):
+                        cute.copy(
+                            tiled_copy_t2r,
+                            tTMEM_load_tAcc[
+                                (None, None, None, n_subtile, mma_consumer_state.index)
+                            ],
+                            tTMEM_load_rAcc,
+                        )
+
+                        for idx in cutlass.range(
+                            cute.size(tTMEM_load_rAcc, mode=[0]), unroll_full=True
+                        ):
+                            local_position: cutlass.Int64 = (
+                                n * self.epi_tile[1]
+                                + n_subtile * cute.size(tTMEM_load_rAcc, mode=[0])
+                                + idx
+                            )
+                            if (block_vocab_left_idx + local_position) < block_vocab_right_idx:
+                                _max_old = tR2GrMax[0]
+                                tR2GrMax[0] = cute.arch.fmax(tR2GrMax[0], tTMEM_load_rAcc[idx])
+                                exp_logits = cute.exp(tTMEM_load_rAcc[idx] - tR2GrMax[0])
+                                coeff = cute.exp(_max_old - tR2GrMax[0])
+                                tR2GrAccu[0] = coeff * tR2GrAccu[0] + exp_logits
+
+                                position: cutlass.Int64 = (
+                                    rank * problem_mnk[1]
+                                    + pidn * self.vocab_per_split
+                                    + local_position
+                                )
+                                mask: cutlass.Boolean = valid_mask and (
+                                    position == tLabelsrLabels[0]
+                                )
+                                tR2GrLogprobs[0] += mask * tTMEM_load_rAcc[idx]
+
+                    mma_pipeline.consumer_release(mma_consumer_state)
+                    mma_consumer_state.advance()
+
+                cute.copy(tiled_copy_r2g, tR2GrMax, tR2GgMax, pred=tLabelsCAcc_mask)
+                cute.copy(tiled_copy_r2g, tR2GrAccu, tR2GgAccu, pred=tLabelsCAcc_mask)
+
+                vocab_left_idx: cutlass.Int64 = rank * problem_mnk[1] + pidn * self.vocab_per_split
+                vocab_right_idx: cutlass.Int64 = rank * problem_mnk[1] + min(
+                    (pidn + 1) * self.vocab_per_split, problem_mnk[1]
+                )
+                valid: cutlass.Boolean = (
+                    tLabelsrLabels[0] >= vocab_left_idx and tLabelsrLabels[0] < vocab_right_idx
+                )
+                tLabelsCAcc_mask[0] &= valid
+
+                cute.copy(tiled_copy_r2g, tR2GrLogprobs, tR2GgLogprobs, pred=tLabelsCAcc_mask)
+
+            # Dealloc TMEM
+            self.cta_sync_barrier.arrive_and_wait()
+            if warp_idx == self.empty_warp_ids[0]:
+                cute.arch.relinquish_tmem_alloc_permit()
+                cute.arch.dealloc_tmem(
+                    tmem_ptr, self.tmem_alloc_cols, is_two_cta=self.use_2cta_instrs
+                )
+
+        @staticmethod
+        def _compute_grid(
+            problem_mnk: Tuple[int, int, int],
+            cluster_shape_mn: Tuple[int, int],
+            cta_tiler: Tuple[int, int, int],
+            num_splits: int,
+        ) -> Tuple[int, int, int]:
+
+            cluster_shape = (*cluster_shape_mn, 1)
+
+            grid = cute.round_up(
+                (cute.ceil_div(problem_mnk[0], cta_tiler[0]), num_splits, 1), cluster_shape
+            )
+            return grid
+
+        @cute.jit
+        def __call__(
+            self,
+            hidden: cute.Tensor,
+            weight: cute.Tensor,
+            labels: cute.Tensor,
+            _logprobs: cute.Tensor,
+            _max: cute.Tensor,
+            _accu: cute.Tensor,
+            ignore_index: cutlass.Int64,
+            rank: cutlass.Int32,
+            stream: cuda.CUstream,
+        ) -> None:
+            a_dtype: Type[cutlass.Numeric] = hidden.element_type
+            b_dtype: Type[cutlass.Numeric] = weight.element_type
+
+            if cutlass.const_expr(hidden.element_type != weight.element_type):
+                raise RuntimeError(
+                    f"data type don't match: {hidden.element_type} v.s. {weight.element_type}"
+                )
+            if cutlass.const_expr(hidden.element_type not in [cutlass.Float16, cutlass.BFloat16]):
+                raise RuntimeError("hidden can only be FP16 or BF16")
+            if cutlass.const_expr(hidden.layout.shape[1] != weight.layout.shape[1]):
+                raise RuntimeError("K dimension doesn't match")
+
+            problem_mnk = (hidden.layout.shape[0], weight.layout.shape[0], hidden.layout.shape[1])
+            if cutlass.const_expr((problem_mnk[2] * a_dtype.width // 8) % 16 != 0):
+                raise RuntimeError(f"K dimension is not 16B aligned: {problem_mnk[2]}")
+
+            num_splits = cute.ceil_div(problem_mnk[1], self.vocab_per_split)
+
+            grid = self._compute_grid(
+                problem_mnk=problem_mnk,
+                cluster_shape_mn=self.cluster_shape_mn,
+                cta_tiler=self.cta_tiler,
+                num_splits=num_splits,
+            )
+            a_major_mode = utils.LayoutEnum.from_tensor(hidden).mma_major_mode()
+            b_major_mode = utils.LayoutEnum.from_tensor(weight).mma_major_mode()
+
+            tiled_mma = sm100_utils.make_trivial_tiled_mma(
+                a_dtype,
+                a_major_mode,
+                b_major_mode,
+                self.acc_dtype,
+                self.cta_group,
+                self.mma_tiler[:2],
+            )
+
+            self._setup_attributes(tiled_mma, a_dtype, b_dtype)
+            if cutlass.const_expr((problem_mnk[2] * a_dtype.width // 8) % 128 != 0):
+                raise RuntimeError(f"K dimension is not 128B aligned: {problem_mnk[2]}")
+
+            self.epi_tile = self.mma_tiler[:2]
+
+            # Swizzle o [(tileM, tileK), loopM, loopK, stage]
+            a_smem_layout_staged = sm100_utils.make_smem_layout_a(
+                tiled_mma, self.mma_tiler, a_dtype, self.num_a_stage
+            )
+            # Swizzle o [(tileN, tileK), loopN, loopK, stage]
+            b_smem_layout_staged = sm100_utils.make_smem_layout_b(
+                tiled_mma, self.mma_tiler, b_dtype, self.num_b_stage
+            )
+
+            # TMA loading
+            tma_load_op = cpasync.CopyBulkTensorTileG2SOp(self.cta_group)
+            tma_store_op = cpasync.CopyBulkTensorTileS2GOp()
+
+            # Swizzle o [(tileM, tileK), loopM, loopK]
+            a_smem_layout = cute.select(a_smem_layout_staged, mode=[0, 1, 2])
+            # create tma copy atom for hidden,
+            # and the cooresponding tma descriptor tensor
+            tma_atom_a, tma_desc_a = cute.nvgpu.make_tiled_tma_atom_A(
+                tma_load_op,
+                hidden,  # gmem_tensor
+                a_smem_layout,  # SMEM layout
+                self.mma_tiler,  # MMA tiler
+                tiled_mma,  # TiledMMA
+                self.cluster_layout_vmnk.shape,  # cluster_shape_vmnk
+            )
+            # Swizzle o [(tileN, tileK), loopN, loopK]
+            b_smem_layout = cute.select(b_smem_layout_staged, mode=[0, 1, 2])
+            tma_atom_b, tma_desc_b = cute.nvgpu.make_tiled_tma_atom_B(
+                tma_load_op,
+                weight,  # gmem_tensor
+                b_smem_layout,  # SMEM layout
+                self.mma_tiler,  # MMA tiler
+                tiled_mma,  # TiledMMA
+                self.cluster_layout_vmnk.shape,  # cluster_shape_vmnk
+            )
+            a_copy_size = cute.size_in_bytes(a_dtype, a_smem_layout)
+            b_copy_size = cute.size_in_bytes(b_dtype, b_smem_layout)
+            self.tma_copy_a_bytes = a_copy_size
+            self.tma_copy_b_bytes = b_copy_size
+
+            assert self.num_a_stage == self.num_b_stage
+
+            @cute.struct
+            class SharedStorage:
+                """
+                The shared storage for the forward kernel.
+                """
+
+                # pipeline barriers, 2 = producer + consumer
+                load_ab_mbar_ptr: cute.struct.MemRange[cutlass.Int64, self.num_a_stage * 2]
+                mma_mbar_ptr: cute.struct.MemRange[cutlass.Int64, self.num_acc_stage * 2]
+                tmem_dealloc_mbar_ptr: cute.struct.MemRange[cutlass.Int64, 1]
+                # tmem holding buffer
+                tmem_holding_buf: cutlass.Int32
+                # SMEM tensors
+                sA: cute.struct.Align[
+                    cute.struct.MemRange[a_dtype, cute.cosize(a_smem_layout_staged)],
+                    self.buffer_align_bytes,
+                ]
+                sB: cute.struct.Align[
+                    cute.struct.MemRange[b_dtype, cute.cosize(b_smem_layout_staged)],
+                    self.buffer_align_bytes,
+                ]
+
+            self.shared_storage = SharedStorage
+
+            # launch kernel
+            self.kernel(
+                tiled_mma,
+                tma_atom_a,
+                tma_desc_a,
+                tma_atom_b,
+                tma_desc_b,
+                labels,
+                _max,
+                _accu,
+                _logprobs,
+                a_smem_layout_staged,
+                b_smem_layout_staged,
+                self.cluster_layout_vmnk,
+                problem_mnk,
+                ignore_index,
+                rank,
+            ).launch(
+                grid=grid,
+                block=[self.threads_per_cta, 1, 1],
+                cluster=self.cluster_shape_mnk,
+                stream=stream,
+            )
+            return None
+
+except ImportError:
+    logging.warning("Cutlass or CUDA Python bindings not found. FwdMainLoop will not be available.")
diff --git a/megatron/core/fusions/linear_cross_entropy/blackwell/triton.py b/megatron/core/fusions/linear_cross_entropy/blackwell/triton.py
new file mode 100644
index 00000000000..e025cc046f4
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/blackwell/triton.py
@@ -0,0 +1,248 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+import triton  # type: ignore
+import triton.language as tl  # type: ignore
+
+# NOTE: tl.pointer_type() is not available in Triton 3.3.0
+
+
+@triton.autotune(
+    configs=[
+        triton.Config({"BLOCK_SIZE_M": 1024}, num_stages=3, num_warps=32),
+        triton.Config({"BLOCK_SIZE_M": 2048}, num_stages=3, num_warps=32),
+    ],
+    key=["num_tokens"],
+)
+@triton.jit
+def get_num_valid_tokens(
+    num_tokens: tl.int64,
+    ignore_index: tl.int64,
+    labels_ptr,  #: tl.pointer_type(tl.int64),
+    stride_labels: tl.int64,
+    num_valid_tokens_ptr,  #: tl.pointer_type(tl.int64),
+    BLOCK_SIZE_M: tl.constexpr,
+):
+    """
+    Calculate the number of valid tokens in the labels tensor.
+    """
+    num_pid_m: tl.int64 = tl.cdiv(num_tokens, BLOCK_SIZE_M)
+
+    num_valid_tokens: tl.int64 = tl.zeros((), dtype=tl.int64)
+    for m in range(0, num_pid_m):
+        offs_am = m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
+
+        labels = tl.load(
+            labels_ptr + offs_am * stride_labels, mask=offs_am < num_tokens, other=ignore_index
+        )
+
+        valid_labels_mask = labels != ignore_index
+        num_valid_tokens += (tl.sum(valid_labels_mask.to(tl.int32), axis=0)).to(tl.int64)
+    tl.store(num_valid_tokens_ptr, num_valid_tokens)
+
+
+@triton.autotune(
+    configs=[triton.Config({"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64})],
+    key=["num_tokens", "num_splits"],
+)
+@triton.jit
+def forward_dp_epilogue(
+    num_tokens: tl.int64,
+    num_splits: tl.int64,  # TODO: maybe this could be a constexpr
+    ignore_index: tl.int64,
+    labels_ptr,  #: tl.pointer_type(tl.int64),
+    stride_labels: tl.int64,
+    num_valid_tokens_ptr,  #: tl.pointer_type(tl.int64),
+    max_ptr,  #: tl.pointer_type(tl.float32),
+    stride_max_m: tl.int64,
+    stride_max_n: tl.int64,
+    accu_ptr,  #: tl.pointer_type(tl.float32),
+    stride_accu_m: tl.int64,
+    stride_accu_n: tl.int64,
+    global_max_ptr,  #: tl.pointer_type(tl.float32),
+    stride_global_max: tl.int64,
+    global_accu_ptr,  #: tl.pointer_type(tl.float32),
+    stride_global_accu: tl.int64,
+    global_logprobs_ptr,  #: tl.pointer_type(tl.float32),
+    stride_global_logprobs: tl.int64,
+    global_logprobs_scalar_ptr,  #: tl.pointer_type(tl.float32),
+    REDUCTION: tl.constexpr,
+    BLOCK_SIZE_M: tl.constexpr,
+    BLOCK_SIZE_N: tl.constexpr,
+):
+    """
+    forward epilogue in dp
+    """
+    pid_m = tl.program_id(axis=0)
+
+    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
+    global_max = tl.zeros((BLOCK_SIZE_M,), dtype=tl.float32)
+    global_accu = tl.zeros((BLOCK_SIZE_M,), dtype=tl.float32)
+
+    for pid_n in range(0, tl.cdiv(num_splits, BLOCK_SIZE_N)):
+        offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
+
+        _max = tl.load(
+            max_ptr + offs_m[:, None] * stride_max_m + offs_n[None, :] * stride_max_n,
+            mask=(offs_m[:, None] < num_tokens) & (offs_n[None, :] < num_splits),
+            other=0.0,
+        )
+        _accu = tl.load(
+            accu_ptr + offs_m[:, None] * stride_accu_m + offs_n[None, :] * stride_accu_n,
+            mask=(offs_m[:, None] < num_tokens) & (offs_n[None, :] < num_splits),
+            other=0.0,
+        )
+
+        # local reduction
+        _max_old = global_max
+        _local_max = tl.max(_max, axis=1, return_indices=False)
+        global_max = tl.maximum(global_max, _local_max)
+
+        _scale = tl.exp(_max - global_max[:, None])
+        _coeff = tl.exp(_max_old - global_max)
+        global_accu = _coeff * global_accu + tl.sum(_scale * _accu, axis=1)
+
+    # store maximum
+    tl.store(global_max_ptr + offs_m * stride_global_max, global_max, mask=offs_m < num_tokens)
+    # store accumulate
+    tl.store(global_accu_ptr + offs_m * stride_global_accu, global_accu, mask=offs_m < num_tokens)
+    # update logprobs
+    labels = tl.load(
+        labels_ptr + offs_m * stride_labels, mask=offs_m < num_tokens, other=ignore_index
+    )
+    global_logprobs_ptrs = global_logprobs_ptr + offs_m * stride_global_logprobs
+    global_logprobs = tl.load(global_logprobs_ptrs, mask=offs_m < num_tokens)
+    global_logprobs = global_max + tl.log(global_accu) - global_logprobs
+    label_mask = labels != ignore_index
+    global_logprobs = tl.where(label_mask, global_logprobs, 0.0)
+
+    if REDUCTION == 0:  # no-reduction
+        tl.store(global_logprobs_ptrs, global_logprobs, mask=offs_m < num_tokens)
+    elif REDUCTION == 1:  # sum
+        global_logprobs_scalar = tl.sum(global_logprobs, axis=0)
+        tl.atomic_add(global_logprobs_scalar_ptr, global_logprobs_scalar)
+    elif REDUCTION == 2:  # mean
+        num_valid_tokens = tl.load(num_valid_tokens_ptr)
+        global_logprobs_scalar = tl.fdiv(
+            tl.sum(global_logprobs, axis=0), num_valid_tokens.to(tl.float32)
+        )
+        tl.atomic_add(global_logprobs_scalar_ptr, global_logprobs_scalar)
+
+
+@triton.autotune(
+    configs=[triton.Config({"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64})],
+    key=["num_tokens", "num_splits"],
+)
+@triton.jit
+def forward_tp_epilogue(
+    num_tokens: tl.int64,
+    num_splits: tl.int64,
+    reduced_max_ptr,  #: tl.pointer_type(tl.float32),
+    stride_reduced_max_m: tl.int64,
+    stride_reduced_max_n: tl.int64,
+    original_max_ptr,  #: tl.pointer_type(tl.float32),
+    stride_original_max_m: tl.int64,
+    stride_original_max_n: tl.int64,
+    accu_ptr,  #: tl.pointer_type(tl.float32),
+    stride_accu_m: tl.int64,
+    stride_accu_n: tl.int64,
+    global_max_ptr,  #: tl.pointer_type(tl.float32),
+    stride_global_max: tl.int64,
+    global_accu_ptr,  #: tl.pointer_type(tl.float32),
+    stride_global_accu: tl.int64,
+    BLOCK_SIZE_M: tl.constexpr,
+    BLOCK_SIZE_N: tl.constexpr,
+):
+    """
+    forward epilogue in tp
+    """
+    pid_m = tl.program_id(axis=0)
+
+    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
+
+    global_max = tl.zeros((BLOCK_SIZE_M,), dtype=tl.float32)
+    global_accu = tl.zeros((BLOCK_SIZE_M,), dtype=tl.float32)
+
+    for pid_n in range(0, tl.cdiv(num_splits, BLOCK_SIZE_N)):
+        offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
+
+        _reduced_max = tl.load(
+            reduced_max_ptr
+            + offs_m[:, None] * stride_reduced_max_m
+            + offs_n[None, :] * stride_reduced_max_n,
+            mask=(offs_m[:, None] < num_tokens) & (offs_n[None, :] < num_splits),
+            other=0.0,
+        )
+        _original_max = tl.load(
+            original_max_ptr
+            + offs_m[:, None] * stride_original_max_m
+            + offs_n[None, :] * stride_original_max_n,
+            mask=(offs_m[:, None] < num_tokens) & (offs_n[None, :] < num_splits),
+            other=0.0,
+        )
+        _accu = tl.load(
+            accu_ptr + offs_m[:, None] * stride_accu_m + offs_n[None, :] * stride_accu_n,
+            mask=(offs_m[:, None] < num_tokens) & (offs_n[None, :] < num_splits),
+            other=0.0,
+        )
+
+        # local reduction
+        _max_old = global_max
+        _local_max = tl.max(_reduced_max, axis=1)
+        global_max = tl.maximum(global_max, _local_max)
+
+        # update accumulate
+        _coeff = tl.exp(_max_old - global_max)
+        _scale = tl.exp(_original_max - global_max[:, None])
+        global_accu = _coeff * global_accu + tl.sum(_scale * _accu, axis=1)
+
+    # store
+    tl.store(global_max_ptr + offs_m * stride_global_max, global_max, mask=offs_m < num_tokens)
+    tl.store(global_accu_ptr + offs_m * stride_global_accu, global_accu, mask=offs_m < num_tokens)
+
+
+@triton.autotune(configs=[triton.Config({"BLOCK_SIZE_M": 16})], key=["num_tokens"])
+@triton.jit
+def forward_tp_epilogue_update_logprobs(
+    num_tokens: tl.int64,
+    ignore_index: tl.int64,
+    num_valid_tokens_ptr,  #: tl.pointer_type(tl.int64),
+    labels_ptr,  #: tl.pointer_type(tl.int64),
+    stride_labels: tl.int64,
+    logprobs_ptr,  #: tl.pointer_type(tl.float32),
+    stride_logprobs: tl.int64,
+    maximum_ptr,  #: tl.pointer_type(tl.float32),
+    stride_maximum: tl.int64,
+    accumulate_ptr,  #: tl.pointer_type(tl.float32),
+    stride_accumulate: tl.int64,
+    logprobs_scalar_ptr,  #: tl.pointer_type(tl.float32),
+    REDUCTION: tl.constexpr,
+    BLOCK_SIZE_M: tl.constexpr,
+):
+    """
+    update logprobs in tp
+    """
+    pid_m = tl.program_id(axis=0)
+
+    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
+
+    logprobs = tl.load(logprobs_ptr + offs_m * stride_logprobs, mask=offs_m < num_tokens)
+    maximum = tl.load(maximum_ptr + offs_m * stride_maximum, mask=offs_m < num_tokens)
+    accumulate = tl.load(accumulate_ptr + offs_m * stride_accumulate, mask=offs_m < num_tokens)
+
+    labels = tl.load(
+        labels_ptr + offs_m * stride_labels, mask=offs_m < num_tokens, other=ignore_index
+    )
+    label_mask = labels != ignore_index
+
+    logprobs = maximum + tl.log(accumulate) - logprobs
+    logprobs = tl.where(label_mask, logprobs, 0.0)
+
+    if REDUCTION == 0:  # no-reduction
+        tl.store(logprobs_ptr + offs_m * stride_logprobs, logprobs, mask=offs_m < num_tokens)
+    elif REDUCTION == 1:  # sum
+        logprobs_scalar = tl.sum(logprobs, axis=0)
+        tl.atomic_add(logprobs_scalar_ptr, logprobs_scalar)
+    elif REDUCTION == 2:  # mean
+        num_valid_tokens = tl.load(num_valid_tokens_ptr)
+        logprobs_scalar = tl.fdiv(tl.sum(logprobs, axis=0), num_valid_tokens.to(tl.float32))
+        tl.atomic_add(logprobs_scalar_ptr, logprobs_scalar)
diff --git a/megatron/core/fusions/linear_cross_entropy/utils.py b/megatron/core/fusions/linear_cross_entropy/utils.py
new file mode 100644
index 00000000000..d077d64ab17
--- /dev/null
+++ b/megatron/core/fusions/linear_cross_entropy/utils.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+import typing
+from enum import Enum
+
+
+class EntropyReductionEnum(Enum):
+    """
+    Enum for the reduction method of cross entropy.
+    """
+
+    kNone = 0
+    kSum = 1
+    kMean = 2
+
+
+def str_to_reduction_enum(reduction: typing.Literal["none", "sum", "mean"]) -> EntropyReductionEnum:
+    """
+    str -> EntropyReductionEnum
+    """
+    _enum = EntropyReductionEnum.kNone
+    if reduction == "none":
+        _enum = EntropyReductionEnum.kNone
+    elif reduction == "sum":
+        _enum = EntropyReductionEnum.kSum
+    elif reduction == "mean":
+        _enum = EntropyReductionEnum.kMean
+    else:
+        raise ValueError(f"Invalid reduction: {reduction}")
+    return _enum
+
+
+class BackwardMethodEnum(Enum):
+    """
+    Enum for the backward method of linear cross entropy.
+    """
+
+    # two separate kernels for d_hidden and d_weight, respectively
+    kTwoKernels = 0
+    # calculate partial d_logits along its N dimension
+    kDlogitsSplitN = 1
+    # fuse d_hidden and d_weight into a single kernel
+    kFused = 2
diff --git a/megatron/core/inference/contexts/dynamic_context.py b/megatron/core/inference/contexts/dynamic_context.py
index 5a1dbb5cf97..9370969a9bc 100644
--- a/megatron/core/inference/contexts/dynamic_context.py
+++ b/megatron/core/inference/contexts/dynamic_context.py
@@ -1157,6 +1157,7 @@ def apply_rotary_emb_query(
             cu_seqlens=cu_seqlens_q,
             cp_group=cp_group,
             mscale=mscale,
+            mla_rotary_interleaved=config.multi_latent_attention,
         )
         return query
 
@@ -1191,11 +1192,21 @@ def apply_rotary_emb_key(
                     f"paused_request_count={self.paused_request_count}"
                 )
             key = apply_rotary_pos_emb(
-                t=key[:n], freqs=key_emb[:n], config=config, cp_group=cp_group, mscale=mscale
+                t=key[:n],
+                freqs=key_emb[:n],
+                config=config,
+                cp_group=cp_group,
+                mscale=mscale,
+                mla_rotary_interleaved=config.multi_latent_attention,
             )
         else:
             key[:n] = apply_rotary_pos_emb(
-                t=key[:n], freqs=key_emb[:n], config=config, cp_group=cp_group, mscale=mscale
+                t=key[:n],
+                freqs=key_emb[:n],
+                config=config,
+                cp_group=cp_group,
+                mscale=mscale,
+                mla_rotary_interleaved=config.multi_latent_attention,
             )
         return key
 
diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py
index d5cd5397d56..681cd526b4e 100644
--- a/megatron/core/model_parallel_config.py
+++ b/megatron/core/model_parallel_config.py
@@ -6,8 +6,11 @@
 
 import torch
 
+from megatron.core.utils import experimental_api
+
 
 @dataclass
+@experimental_api
 class ModelParallelConfig:
     """Base configuration for Megatron Core
 
@@ -59,14 +62,23 @@ class ModelParallelConfig:
     can handle without overflowing the memory. Typically, a good starting point is to set this
     to maximum sequence length / context parallel size.
     This is used to calculate the number and length of sub-samples assigned to 
-    each rank when using hybrid_context_parallel.
+    each rank when sequence_packing_scheduler is not None.
     """
 
-    hybrid_context_parallel: bool = False
+    dynamic_context_parallel: bool = False
     """
-    If true, enables hybrid context parallel. This is used to balance the workload of 
+    If true, enables dynamic context parallel. This is used to balance the workload of 
     each CP rank when we use packed samples with variable sequence lengths.
-    Please set max_seqlen_per_dp_cp_rank when using hybrid_context_parallel.
+    Please set max_seqlen_per_dp_cp_rank when using dynamic_context_parallel.
+    """
+
+    hybrid_context_parallel: bool = False
+    """Deprecated. Use ``dynamic_context_parallel`` instead."""
+
+    sequence_packing_scheduler: Optional[Literal['dp_balanced']] = None
+    """
+    Scheduler for sequence packing and dynamic context parallel.
+    dp_balanced: DP-balanced scheduler for sequence packing.
     """
 
     expert_model_parallel_size: int = 1
@@ -235,9 +247,14 @@ class ModelParallelConfig:
        Defaults to False.
     """
 
-    cross_entropy_fusion_impl: Literal['native', 'te'] = 'native'
-    """If 'native', MCore based CE loss fusion is used, if 'te', Parallel CE loss
-       from Transformer Engine library is used. Defaults to 'native'.
+    cross_entropy_fusion_impl: Literal['native', 'te', 'linear'] = 'native'
+    """
+    Specifies the implementation of cross-entropy loss fusion.
+
+    Options:
+    - 'native': Uses MCore-based cross-entropy loss fusion (default).
+    - 'te': Uses the parallel cross-entropy loss implementation from the Transformer Engine library.
+    - 'linear': Uses a linear-cross-entropy fusion approach.
     """
 
     tp_comm_overlap_disable_qkv: bool = False
@@ -261,6 +278,15 @@ class ModelParallelConfig:
     delay_wgrad_compute: bool = False
     """Delay the weight gradient computation to improve batch-level communication overlapping"""
 
+    overlap_dispatch_backward_with_experts_wgrad: bool = False
+    """Delay the weight gradient computation for TE Grouped GEMM MoE experts.
+    When enabled with FSDP, the expert weight gradients are computed on a separate
+    CUDA stream after the data gradients finish, allowing overlap of wgrad compute
+    with EP A2A communication. The FSDP gradient reduce-scatter for
+    expert parameters is deferred until the delayed wgrad computation completes.
+    This requires transformer_engine with GroupedLinear support (TE >= 2.3.0).
+    """
+
     ep_overlap_early_attn_memory_release: bool = False
     """Enable early memory release of attention activations during EP overlap.
     EP overlap can increase peak memory usage when the overlapped forward module allocates 
@@ -403,6 +429,19 @@ def __post_init__(self):
         See https://docs.python.org/3/library/dataclasses.html#post-init-processing for more
         details.
         """
+        if self.hybrid_context_parallel:
+            warnings.warn(
+                "hybrid_context_parallel is deprecated and will be removed in a future release. "
+                "Use dynamic_context_parallel instead.",
+                DeprecationWarning,
+            )
+            if self.dynamic_context_parallel:
+                raise ValueError(
+                    "Cannot set both hybrid_context_parallel and dynamic_context_parallel. "
+                    "Please use dynamic_context_parallel only."
+                )
+            self.dynamic_context_parallel = True
+
         if self.sequence_parallel:
             if self.tensor_model_parallel_size <= 1:
                 raise ValueError("Cannot use sequence parallelism without tensor parallelism")
diff --git a/megatron/core/models/common/embeddings/rope_utils.py b/megatron/core/models/common/embeddings/rope_utils.py
index 2fd19194813..b990615da29 100644
--- a/megatron/core/models/common/embeddings/rope_utils.py
+++ b/megatron/core/models/common/embeddings/rope_utils.py
@@ -93,8 +93,9 @@ def _apply_rotary_pos_emb_bshd(
     t: Tensor,
     freqs: Tensor,
     rotary_interleaved: bool = False,
-    multi_latent_attention: bool = False,
+    mla_rotary_interleaved: bool = False,
     mscale: float = 1.0,
+    multi_latent_attention: Optional[bool] = None,
 ) -> Tensor:
     """Apply rotary positional embedding to input tensor T.
 
@@ -103,16 +104,26 @@ def _apply_rotary_pos_emb_bshd(
     Args:
         t (Tensor): Input tensor T is of shape [seq_length, ... , dim]
         freqs (Tensor): Rotary Positional embedding tensor freq is of shape [seq_length, ..., dim]
+        rotary_interleaved (bool): Whether to apply interleaving in the rotate half function.
+        mla_rotary_interleaved (bool): Whether to apply MLA-style interleaving for RoPE.
+        mscale (float): The scaling factor for the RoPE.
 
     Returns:
         Tensor: The input tensor after applying RoPE
     """
+    if multi_latent_attention is not None:
+        warnings.warn(
+            "multi_latent_attention is deprecated. Please use mla_rotary_interleaved instead.",
+            DeprecationWarning,
+        )
+        mla_rotary_interleaved = multi_latent_attention
+
     rot_dim = freqs.shape[-1]
 
     # ideally t_pass is empty so rotary pos embedding is applied to all tensor t
     t, t_pass = t[..., :rot_dim], t[..., rot_dim:]
 
-    if multi_latent_attention:
+    if mla_rotary_interleaved:
         x1 = t[..., 0::2]
         x2 = t[..., 1::2]
         t = torch.cat((x1, x2), dim=-1)
@@ -180,9 +191,10 @@ def _apply_rotary_pos_emb_thd(
     cu_seqlens: Tensor,
     freqs: Tensor,
     rotary_interleaved: bool = False,
-    multi_latent_attention: bool = False,
+    mla_rotary_interleaved: bool = False,
     mscale: float = 1.0,
     cp_group: torch.distributed.ProcessGroup = None,
+    multi_latent_attention: Optional[bool] = None,
 ) -> Tensor:
     """A baseline implementation of applying RoPE for `thd` format.
 
@@ -196,6 +208,12 @@ def _apply_rotary_pos_emb_thd(
     Returns:
         Tensor: Shape [t, h, d]. The input tensor after applying RoPE.
     """
+    if multi_latent_attention is not None:
+        warnings.warn(
+            "multi_latent_attention is deprecated. Please use mla_rotary_interleaved instead.",
+            DeprecationWarning,
+        )
+        mla_rotary_interleaved = multi_latent_attention
 
     if cp_group is None:
         raise ValueError("cp_group must be provided for THD format RoPE")
@@ -226,7 +244,7 @@ def _apply_rotary_pos_emb_thd(
             t.unsqueeze(1),
             freqs_packed,
             rotary_interleaved=rotary_interleaved,
-            multi_latent_attention=multi_latent_attention,
+            mla_rotary_interleaved=mla_rotary_interleaved,
             mscale=mscale,
         ).squeeze(1)
     else:
@@ -242,7 +260,7 @@ def _apply_rotary_pos_emb_thd(
             t.unsqueeze(1),
             freqs_packed,
             rotary_interleaved=rotary_interleaved,
-            multi_latent_attention=multi_latent_attention,
+            mla_rotary_interleaved=mla_rotary_interleaved,
             mscale=mscale,
         ).squeeze(1)
 
@@ -254,6 +272,7 @@ def apply_rotary_pos_emb(
     cu_seqlens: Optional[Tensor] = None,
     mscale: float = 1.0,
     cp_group: torch.distributed.ProcessGroup = None,
+    mla_rotary_interleaved: bool = False,
 ):
     """
     Reroute to the appropriate apply_rotary_pos_emb function depending on
@@ -282,6 +301,12 @@ def apply_rotary_pos_emb(
                     "Using unfused implementation."
                 )
                 use_unfused = True
+            if mla_rotary_interleaved:
+                warnings.warn(
+                    "apply_rope_fusion does not support MLA-style interleaving in RoPE."
+                    "Using unfused implementation."
+                )
+                use_unfused = True
             if not use_unfused:
                 assert fused_apply_rotary_pos_emb is not None, "apply_rope_fusion is not available."
                 return fused_apply_rotary_pos_emb(t, freqs, interleaved=config.rotary_interleaved)
@@ -301,7 +326,7 @@ def apply_rotary_pos_emb(
             t,
             freqs,
             rotary_interleaved=config.rotary_interleaved,
-            multi_latent_attention=config.multi_latent_attention,
+            mla_rotary_interleaved=mla_rotary_interleaved,
             mscale=mscale,
         )
     else:
@@ -310,7 +335,7 @@ def apply_rotary_pos_emb(
             cu_seqlens,
             freqs,
             rotary_interleaved=config.rotary_interleaved,
-            multi_latent_attention=config.multi_latent_attention,
+            mla_rotary_interleaved=mla_rotary_interleaved,
             mscale=mscale,
             cp_group=cp_group,
         )
@@ -339,7 +364,7 @@ def apply_rotary_pos_emb_with_cos_sin(
             t,
             freqs,
             rotary_interleaved=rotary_interleaved,
-            multi_latent_attention=False,
+            mla_rotary_interleaved=False,
             mscale=1.0,
         )
     else:
diff --git a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
index 166ef9b41e7..bc5a9c5fa3f 100644
--- a/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
+++ b/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
@@ -186,13 +186,13 @@ def forward(
             emb = get_pos_emb_on_this_cp_rank(emb, 0, cp_group)
         return emb, _mscale
 
-    def _set_cos_sin_cache(self, seq_len, offset, dtype, packed_seq=False):
+    def _set_cos_sin_cache(self, seq_len, offset, dtype, packed_seq=False, cp_group=None):
         self.max_seq_len_cached = seq_len
         self.offset_cached = offset
         self.dtype_cached = dtype
         self.packed_seq_cached = packed_seq
 
-        emb, _mscale = self.forward(seq_len, offset, packed_seq)
+        emb, _mscale = self.forward(seq_len, offset, packed_seq=packed_seq, cp_group=cp_group)
         self.register_buffer(
             "cos_cached", (emb.cos() * _mscale).to(dtype).contiguous(), persistent=False
         )
@@ -201,7 +201,7 @@ def _set_cos_sin_cache(self, seq_len, offset, dtype, packed_seq=False):
         )
 
     def get_cached_cos_sin(
-        self, seq_len, offset=0, dtype=torch.get_default_dtype(), packed_seq=False
+        self, seq_len, offset=0, dtype=torch.get_default_dtype(), packed_seq=False, cp_group=None
     ):
         """Get cached cos and sin values."""
         if (
@@ -210,7 +210,7 @@ def get_cached_cos_sin(
             or dtype != self.dtype_cached
             or packed_seq != self.packed_seq_cached
         ):
-            self._set_cos_sin_cache(seq_len, offset, dtype, packed_seq)
+            self._set_cos_sin_cache(seq_len, offset, dtype, packed_seq, cp_group)
         return (self.cos_cached[:seq_len, ...], self.sin_cached[:seq_len, ...])
 
 
diff --git a/megatron/core/models/common/language_module/language_module.py b/megatron/core/models/common/language_module/language_module.py
index 3c6b7c4ab8d..6d6d4737c3d 100644
--- a/megatron/core/models/common/language_module/language_module.py
+++ b/megatron/core/models/common/language_module/language_module.py
@@ -384,7 +384,7 @@ def tie_embeddings_and_output_weights_state_dict(
         sharded_state_dict: ShardedStateDict,
         output_layer_weight_key: str,
         first_stage_word_emb_key: str,
-        metadata: dict = {},
+        metadata: Optional[dict] = None,
     ) -> None:
         """Ties the embedding and output weights in a given sharded state dict.
 
@@ -394,9 +394,11 @@ def tie_embeddings_and_output_weights_state_dict(
                 This entry will be replaced with a tied version
             first_stage_word_emb_key (str): this must be the same as the
                 ShardedTensor.key of the first stage word embeddings.
+            metadata (Optional[Dict]): metadata controlling sharded state dict creation.
 
         Returns: None, acts in-place
         """
+        metadata = ensure_metadata_has_dp_cp_group(metadata)
         if not self.post_process:
             # No output layer
             assert output_layer_weight_key not in sharded_state_dict, sharded_state_dict.keys()
diff --git a/megatron/core/models/common/model_chunk_schedule_plan.py b/megatron/core/models/common/model_chunk_schedule_plan.py
index 2b9d72d5f35..2a7476228ae 100644
--- a/megatron/core/models/common/model_chunk_schedule_plan.py
+++ b/megatron/core/models/common/model_chunk_schedule_plan.py
@@ -14,6 +14,7 @@
     get_comm_stream,
     get_comp_stream,
 )
+from megatron.core.transformer.enums import CudaGraphScope
 
 
 class ModelChunkState:
@@ -172,6 +173,11 @@ def create_node(stream, module, name):
         else:
             self.mtp_post_process = NoopScheduleNode()
 
+        # mlp and combine may receive dgrad from attn, which is managed by cuda graph.
+        if CudaGraphScope.attn in self.config.cuda_graph_scope:
+            self.mlp.manual_grads_release = False
+            self.moe_combine.manual_grads_release = False
+
     def get_fp8_context(self):
         """
         Get the fp8 context for the transformer layer.
diff --git a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
index 6608073136c..fa4e15db856 100644
--- a/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
+++ b/megatron/core/models/gpt/experimental_attention_variant_module_specs.py
@@ -12,6 +12,7 @@
     DSAttention,
     DSAttentionSubmodules,
 )
+from megatron.core.transformer.hyper_connection import HyperConnectionModule
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.multi_latent_attention import (
     MLASelfAttention,
@@ -24,6 +25,7 @@
 )
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import (
+    HyperConnectionTransformerLayer,
     TransformerLayer,
     TransformerLayerSubmodules,
     get_transformer_layer_offset,
@@ -81,17 +83,6 @@ def get_dsa_module_spec_for_backend(
     assert config.multi_latent_attention, "Currently only MLA supports sparse attention."
     assert config.qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
 
-    linear_q_up_proj = (
-        backend.column_parallel_layer_norm_linear()
-        if config.qk_layernorm
-        else backend.column_parallel_linear()
-    )
-    linear_kv_up_proj = (
-        backend.column_parallel_layer_norm_linear()
-        if config.qk_layernorm
-        else backend.column_parallel_linear()
-    )
-
     # Because TransformerEngine does not support sparse attention yet, we use local
     # implementation whether the backend is TransformerEngine or not.
     core_attention = ModuleSpec(
@@ -109,20 +100,29 @@ def get_dsa_module_spec_for_backend(
         ),
     )
 
+    # Adjust for RMS norm.
+    rms_norm = config.normalization == "RMSNorm"
+    # DSA indexer requires normalized q as input, so here we cannot fuse qk layernorm
+    # with linear projection and have to use unfused qk layernorm.
+    qk_norm = (
+        backend.layer_norm(rms_norm=rms_norm, for_qk=True) if config.qk_layernorm else IdentityOp
+    )
+
     attention = ModuleSpec(
         module=MLASelfAttention,
         params={"attn_mask_type": AttnMaskType.causal},
         submodules=MLASelfAttentionSubmodules(
             linear_q_proj=backend.column_parallel_linear(),
             linear_q_down_proj=backend.linear(),
-            linear_q_up_proj=linear_q_up_proj,
+            linear_q_up_proj=backend.column_parallel_linear(),
             linear_kv_down_proj=backend.linear(),
-            linear_kv_up_proj=linear_kv_up_proj,
+            linear_kv_up_proj=backend.column_parallel_linear(),
             core_attention=core_attention,
             linear_proj=backend.row_parallel_linear(),
-            q_layernorm=IdentityOp,
-            kv_layernorm=IdentityOp,
+            q_layernorm=qk_norm,
+            kv_layernorm=qk_norm,
         ),
+        metainfo={"fuse_input_layernorm": False},
     )
 
     return attention
@@ -138,6 +138,8 @@ def get_experimental_attention_variant_module_spec(
 
     if config.experimental_attention_variant == "gated_delta_net":
         return get_gated_delta_net_module_spec(config=config, backend=backend)
+    elif config.experimental_attention_variant == "dsa":
+        return get_dsa_module_spec_for_backend(config=config, backend=backend)
     else:
         raise ValueError(
             f"Invalid experimental attention variant: {config.experimental_attention_variant}"
@@ -149,12 +151,12 @@ def get_experimental_attention_variant_module_spec(
 ##########
 
 
-def get_transformer_block_with_experimental_attention_variant_spec(
-    config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
-) -> TransformerBlockSubmodules:
-    """Build transformer block spec with experimental attention variants (e.g., linear attention).
+def get_transformer_layer_with_experimental_attention_variant_spec(
+    config: TransformerConfig, backend: BackendSpecProvider = None
+) -> List[ModuleSpec]:
+    """Build transformer layer specs with experimental attention variants (e.g., linear attention).
 
-    This function constructs a heterogeneous transformer block that supports mixing different
+    This function is for constructing a heterogeneous transformer that supports mixing different
     attention mechanisms (experimental vs standard) and MLP types (MoE vs dense) across layers.
     **Note that, this API is a experimental API in the short term, and might be deprecated in the
     future. In the long run, we will move to a new design that better support hybrid models.**
@@ -170,22 +172,19 @@ def get_transformer_block_with_experimental_attention_variant_spec(
         2. Per-Layer Spec Construction: Iterates through layers, constructing transformer
            layer specs based on attention and MLP patterns.
 
-        3. Pipeline Slicing: Extracts layer specs for the current pipeline stage.
-
     Args:
         config: Transformer configuration containing model hyperparameters and feature flags.
-        vp_stage: Virtual pipeline stage index for interleaved pipeline parallelism.
-        pp_rank: Pipeline model parallel rank.
 
     Returns:
-        TransformerBlockSubmodules containing per-layer specs and final layer norm.
+        List[ModuleSpec] containing per-layer specs.
 
     Note:
         Currently only supports transformer_engine backend. Kitchen backend can be used as a
         wrapper with TE fallback for unsupported operations.
     """
 
-    backend = _get_backend_spec_provider(config=config)
+    if backend is None:
+        backend = _get_backend_spec_provider(config=config)
 
     # Get attention patterns and specs
     experimental_attention_pattern = [0] * config.num_layers
@@ -224,6 +223,10 @@ def get_transformer_block_with_experimental_attention_variant_spec(
 
     # Get GPT decoder block layer specs
     rms_norm = config.normalization == "RMSNorm"
+    enable_hc = config.enable_hyper_connections
+    hc_module = HyperConnectionModule if enable_hc else IdentityOp
+    layer_module = HyperConnectionTransformerLayer if enable_hc else TransformerLayer
+
     layer_specs = []
     for layer_number in range(config.num_layers):
         attention = (
@@ -245,18 +248,56 @@ def get_transformer_block_with_experimental_attention_variant_spec(
 
         layer_specs.append(
             ModuleSpec(
-                module=TransformerLayer,
+                module=layer_module,
                 submodules=TransformerLayerSubmodules(
                     input_layernorm=input_layernorm,
                     self_attention=attention,
                     self_attn_bda=get_bias_dropout_add,
+                    self_attention_hyper_connection=hc_module,
                     pre_mlp_layernorm=pre_mlp_layernorm,
                     mlp=mlp,
                     mlp_bda=get_bias_dropout_add,
+                    mlp_hyper_connection=hc_module,
                 ),
             )
         )
 
+    return layer_specs
+
+
+def get_transformer_block_with_experimental_attention_variant_spec(
+    config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
+) -> TransformerBlockSubmodules:
+    """Build transformer block spec with experimental attention variants (e.g., linear attention).
+
+    This function constructs a heterogeneous transformer block that supports mixing different
+    attention mechanisms (experimental vs standard) and MLP types (MoE vs dense) across layers.
+    **Note that, this API is a experimental API in the short term, and might be deprecated in the
+    future. In the long run, we will move to a new design that better support hybrid models.**
+
+    Constructing transformer layer specs by
+    `get_transformer_layer_with_experimental_attention_variant_spec` and then slicing the
+    layer specs to only include the layers that are built in this pipeline stage.
+
+    Args:
+        config: Transformer configuration containing model hyperparameters and feature flags.
+        vp_stage: Virtual pipeline stage index for interleaved pipeline parallelism.
+        pp_rank: Pipeline model parallel rank.
+
+    Returns:
+        TransformerBlockSubmodules containing per-layer specs and final layer norm.
+
+    Note:
+        Currently only supports transformer_engine backend. Kitchen backend can be used as a
+        wrapper with TE fallback for unsupported operations.
+    """
+
+    backend = _get_backend_spec_provider(config=config)
+
+    layer_specs = get_transformer_layer_with_experimental_attention_variant_spec(
+        config=config, backend=backend
+    )
+
     # Slice the layer specs to only include the layers that are built in this pipeline stage.
     if config.pipeline_model_parallel_layout is not None:
         local_layer_ids = config.pipeline_model_parallel_layout.get_layer_id_list(
@@ -270,6 +311,7 @@ def get_transformer_block_with_experimental_attention_variant_spec(
     layer_specs = [layer_specs[layer_id] for layer_id in local_layer_ids]
 
     # Get GPT decoder block spec
+    rms_norm = config.normalization == "RMSNorm"
     gpt_decoder_block_spec = TransformerBlockSubmodules(
         layer_specs=layer_specs, layer_norm=backend.layer_norm(rms_norm=rms_norm, for_qk=False)
     )
@@ -359,7 +401,7 @@ def _get_backend_spec_provider(config: TransformerConfig) -> BackendSpecProvider
     )
     backend: BackendSpecProvider = (
         KitchenSpecProvider(
-            fallback=TESpecProvider(),
+            fallback=TESpecProvider(fallback_to_eager_attn=config.fallback_to_eager_attn),
             use_kitchen_attention=config.use_kitchen_attention,
             kitchen_attention_backend=config.kitchen_attention_backend,
         )
@@ -395,6 +437,7 @@ def _get_self_attention_module_spec(
         qk_l2_norm=config.qk_l2_norm,
         use_kitchen=config.use_kitchen,
         use_te_activation_func=config.use_te_activation_func,
+        fallback_to_eager_attn=config.fallback_to_eager_attn,
         use_kitchen_attention=config.use_kitchen_attention,
         kitchen_attention_backend=config.kitchen_attention_backend,
         mla_down_proj_fusion=getattr(config, "mla_down_proj_fusion", False),
diff --git a/megatron/core/models/gpt/fine_grained_callables.py b/megatron/core/models/gpt/fine_grained_callables.py
index 8d1036b5bae..1261f384b8b 100644
--- a/megatron/core/models/gpt/fine_grained_callables.py
+++ b/megatron/core/models/gpt/fine_grained_callables.py
@@ -478,18 +478,16 @@ def forward_func(
                 )
                 if not isinstance(layer.mlp, MoELayer):
                     return hidden_states, None, None, None
+                mlp_norm_manager = off_interface(layer.offload_mlp_norm, hidden_states, "mlp_norm")
+                node.layer_state.mlp_norm_manager = mlp_norm_manager
                 if layer.recompute_pre_mlp_layernorm:
                     layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-                    with off_interface(
-                        layer.offload_mlp_norm, hidden_states, "mlp_norm"
-                    ) as hidden_states:
+                    with mlp_norm_manager as hidden_states:
                         pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
                             apply_module(layer.pre_mlp_layernorm), hidden_states
                         )
                 else:
-                    with off_interface(
-                        layer.offload_mlp_norm, hidden_states, "mlp_norm"
-                    ) as hidden_states:
+                    with mlp_norm_manager as hidden_states:
                         pre_mlp_layernorm_output = apply_module(layer.pre_mlp_layernorm)(
                             hidden_states
                         )
@@ -591,10 +589,12 @@ def submodule_combine_forward(node: ScheduleNode, output: torch.Tensor):
             )
         # Delay the offload of the mlp norm until after the mlp_bda has been computed
         # because the residual is needed in the mlp_bda.
-        if layer.offload_mlp_norm:
-            hidden_states = off_interface.group_commit(
-                hidden_states, name="mlp_norm", forced_released_tensors=[residual]
+        mlp_norm_manager = getattr(node.layer_state, 'mlp_norm_manager', None)
+        if mlp_norm_manager is not None:
+            hidden_states = mlp_norm_manager.group_offload(
+                hidden_states, forced_released_tensors=[residual]
             )
+            node.layer_state.mlp_norm_manager = None
         output = make_viewless_tensor(
             inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True
         )
diff --git a/megatron/core/models/gpt/gpt_layer_specs.py b/megatron/core/models/gpt/gpt_layer_specs.py
index 5e90f0b36be..7c108163f6e 100755
--- a/megatron/core/models/gpt/gpt_layer_specs.py
+++ b/megatron/core/models/gpt/gpt_layer_specs.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+import copy
 import warnings
 from typing import Optional, Union
 
@@ -12,6 +13,7 @@
 from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec_for_backend
 from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
 from megatron.core.transformer.enums import AttnMaskType, LayerType
+from megatron.core.transformer.hyper_connection import HyperConnectionModule
 from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.mlp import MLP, MLPSubmodules
 from megatron.core.transformer.multi_latent_attention import (
@@ -34,6 +36,7 @@
 )
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import (
+    HyperConnectionTransformerLayer,
     TransformerLayer,
     TransformerLayerSubmodules,
     get_transformer_layer_offset,
@@ -54,7 +57,7 @@
     HAVE_KITCHEN = False
 
 try:
-    import apex  # type: ignore[import-untyped]  # pylint: disable=unused-import
+    import apex  # pylint: disable=unused-import
 
     from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
 
@@ -180,8 +183,10 @@ def get_gpt_layer_with_transformer_engine_submodules(
     use_te_op_fuser: Optional[bool] = False,
     use_kitchen: bool = False,
     use_te_activation_func: bool = False,
+    fallback_to_eager_attn: bool = False,
     use_kitchen_attention: bool = False,
     kitchen_attention_backend: str = "sdpa",
+    enable_hyper_connection: bool = False,
     mla_down_proj_fusion: bool = False,
 ) -> TransformerLayerSubmodules:
     """Use these submodules to use lower-level Transformer Engine modules (required for fp8
@@ -197,6 +202,8 @@ def get_gpt_layer_with_transformer_engine_submodules(
         qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.
         use_te_op_fuser (bool, optional): Use Transformer Engine's operation-based API, which may
                                           enable certain operation fusions. Defaults to False.
+        enable_hyper_connection (bool): Use HyperConnectionTransformerLayer with
+            HyperConnectionModule instead of plain TransformerLayer. Defaults to False.
         mla_down_proj_fusion (bool, optional): Enable fused q/kv down-projection and fused input
                                                layernorm when backend supports. Otherwise fall back
                                                to the unfused MLA.
@@ -214,7 +221,7 @@ def get_gpt_layer_with_transformer_engine_submodules(
     if use_kitchen:
         assert HAVE_KITCHEN
         backend: BackendSpecProvider = KitchenSpecProvider(
-            fallback=TESpecProvider(),
+            fallback=TESpecProvider(fallback_to_eager_attn=fallback_to_eager_attn),
             use_kitchen_attention=use_kitchen_attention,
             kitchen_attention_backend=kitchen_attention_backend,
         )
@@ -223,7 +230,7 @@ def get_gpt_layer_with_transformer_engine_submodules(
         if use_te_activation_func:
             raise AssertionError("use_te_activation_func not compatible with using kitchen.")
     else:
-        backend = TESpecProvider()
+        backend = TESpecProvider(fallback_to_eager_attn=fallback_to_eager_attn)
 
     mlp = get_mlp_module_spec_for_backend(
         backend=backend,
@@ -233,6 +240,8 @@ def get_gpt_layer_with_transformer_engine_submodules(
         use_te_activation_func=use_te_activation_func,
     )
 
+    hc_module = HyperConnectionModule if enable_hyper_connection else IdentityOp
+
     if multi_latent_attention:
         assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
         linear_q_up_proj = (
@@ -302,9 +311,11 @@ def get_gpt_layer_with_transformer_engine_submodules(
                 ),
             ),
             self_attn_bda=get_bias_dropout_add,
+            self_attention_hyper_connection=hc_module,
             pre_mlp_layernorm=backend.layer_norm(has_residual=True) if num_experts else IdentityOp,
             mlp=mlp,
             mlp_bda=get_bias_dropout_add,
+            mlp_hyper_connection=hc_module,
         )
     else:
         qk_norm = backend.layer_norm(for_qk=True)
@@ -325,9 +336,11 @@ def get_gpt_layer_with_transformer_engine_submodules(
                 ),
             ),
             self_attn_bda=get_bias_dropout_add,
+            self_attention_hyper_connection=hc_module,
             pre_mlp_layernorm=backend.layer_norm(has_residual=True) if num_experts else IdentityOp,
             mlp=mlp,
             mlp_bda=get_bias_dropout_add,
+            mlp_hyper_connection=hc_module,
             sharded_state_dict_keys_map={
                 "mlp.0.weight": "mlp.linear_fc1.layer_norm_weight",
                 "mlp.0.bias": "mlp.linear_fc1.layer_norm_bias",
@@ -342,8 +355,10 @@ def get_gpt_layer_with_transformer_engine_submodules(
 @copy_signature(get_gpt_layer_with_transformer_engine_submodules)
 def get_gpt_layer_with_transformer_engine_spec(*args, **kwargs) -> ModuleSpec:
     """Use this spec to use lower-level Transformer Engine modules (required for fp8 training)."""
+    enable_hc = kwargs.get('enable_hyper_connection', False)
+    layer_module = HyperConnectionTransformerLayer if enable_hc else TransformerLayer
     return ModuleSpec(
-        module=TransformerLayer,
+        module=layer_module,
         submodules=get_gpt_layer_with_transformer_engine_submodules(*args, **kwargs),
     )
 
@@ -359,6 +374,7 @@ def get_gpt_layer_local_submodules(
     use_kitchen: bool = False,
     use_kitchen_attention: bool = False,
     kitchen_attention_backend: str = "sdpa",
+    enable_hyper_connection: bool = False,
 ) -> TransformerLayerSubmodules:
     """Use these submodules for an implementation using only modules in Megatron-Core.
 
@@ -370,6 +386,8 @@ def get_gpt_layer_local_submodules(
         multi_latent_attention (bool, optional): To use MLA. Defaults to False.
         fp8 (str, optional): Deprecated. For temporary Nemo compatibility.
         qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.
+        enable_hyper_connection (bool): Use HyperConnectionTransformerLayer with
+            HyperConnectionModule instead of plain TransformerLayer. Defaults to False.
 
     Returns:
         TransformerLayerSubmodules: Megatron-Core modules to construct a TransformerLayer
@@ -402,6 +420,8 @@ def get_gpt_layer_local_submodules(
         backend=backend, num_experts=num_experts, moe_grouped_gemm=moe_grouped_gemm
     )
 
+    hc_module = HyperConnectionModule if enable_hyper_connection else IdentityOp
+
     if multi_latent_attention:
         assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
         return TransformerLayerSubmodules(
@@ -422,9 +442,11 @@ def get_gpt_layer_local_submodules(
                 ),
             ),
             self_attn_bda=get_bias_dropout_add,
+            self_attention_hyper_connection=hc_module,
             pre_mlp_layernorm=layer_norm,
             mlp=mlp,
             mlp_bda=get_bias_dropout_add,
+            mlp_hyper_connection=hc_module,
         )
     else:
         return TransformerLayerSubmodules(
@@ -445,9 +467,11 @@ def get_gpt_layer_local_submodules(
                 ),
             ),
             self_attn_bda=get_bias_dropout_add,
+            self_attention_hyper_connection=hc_module,
             pre_mlp_layernorm=layer_norm,
             mlp=mlp,
             mlp_bda=get_bias_dropout_add,
+            mlp_hyper_connection=hc_module,
             sharded_state_dict_keys_map={
                 "input_layernorm.": "self_attention.linear_qkv.layer_norm_",
                 "pre_mlp_layernorm.": "mlp.linear_fc1.layer_norm_",
@@ -458,8 +482,10 @@ def get_gpt_layer_local_submodules(
 @copy_signature(get_gpt_layer_local_submodules)
 def get_gpt_layer_local_spec(*args, **kwargs) -> ModuleSpec:
     """Use this spec for an implementation using only modules in Megatron-Core."""
+    enable_hc = kwargs.get('enable_hyper_connection', False)
+    layer_module = HyperConnectionTransformerLayer if enable_hc else TransformerLayer
     return ModuleSpec(
-        module=TransformerLayer, submodules=get_gpt_layer_local_submodules(*args, **kwargs)
+        module=layer_module, submodules=get_gpt_layer_local_submodules(*args, **kwargs)
     )
 
 
@@ -551,12 +577,14 @@ def get_gpt_decoder_layer_specs(
     use_transformer_engine: bool,
     normalization: Optional[str] = None,
     qk_l2_norm: Optional[bool] = False,
-    vp_stage: Optional[int] = None,
-    pp_rank: Optional[int] = None,
 ) -> TransformerBlockSubmodules:
     """GPT block spec."""
+    assert config.experimental_attention_variant is None, (
+        "Experimental attention variant is not supported with get_gpt_decoder_layer_specs, "
+        f"but got {config.experimental_attention_variant=}."
+    )
+
     if use_transformer_engine:
-        layer_norm_impl = TENorm
         dense_layer_spec = get_gpt_layer_with_transformer_engine_spec(
             num_experts=None,
             moe_grouped_gemm=False,
@@ -565,6 +593,7 @@ def get_gpt_decoder_layer_specs(
             qk_l2_norm=qk_l2_norm,
             use_kitchen=config.use_kitchen,
             use_te_activation_func=config.use_te_activation_func,
+            enable_hyper_connection=config.enable_hyper_connections,
             use_kitchen_attention=config.use_kitchen_attention,
             kitchen_attention_backend=config.kitchen_attention_backend,
             mla_down_proj_fusion=getattr(config, "mla_down_proj_fusion", False),
@@ -577,6 +606,7 @@ def get_gpt_decoder_layer_specs(
             qk_l2_norm=qk_l2_norm,
             use_kitchen=config.use_kitchen,
             use_te_activation_func=config.use_te_activation_func,
+            enable_hyper_connection=config.enable_hyper_connections,
             use_kitchen_attention=config.use_kitchen_attention,
             kitchen_attention_backend=config.kitchen_attention_backend,
             mla_down_proj_fusion=getattr(config, "mla_down_proj_fusion", False),
@@ -597,7 +627,6 @@ def get_gpt_decoder_layer_specs(
             moe_use_legacy_grouped_gemm=config.moe_use_legacy_grouped_gemm,
         )
     else:
-        layer_norm_impl = LNImpl
         dense_layer_spec = get_gpt_layer_local_spec(
             num_experts=None,
             moe_grouped_gemm=False,
@@ -606,8 +635,7 @@ def get_gpt_decoder_layer_specs(
             normalization=normalization,
             qk_l2_norm=qk_l2_norm,
             use_kitchen=config.use_kitchen,
-            use_kitchen_attention=config.use_kitchen_attention,
-            kitchen_attention_backend=config.kitchen_attention_backend,
+            enable_hyper_connection=config.enable_hyper_connections,
         )
         moe_layer_spec = get_gpt_layer_local_spec(
             num_experts=config.num_moe_experts,
@@ -617,8 +645,7 @@ def get_gpt_decoder_layer_specs(
             normalization=normalization,
             qk_l2_norm=qk_l2_norm,
             use_kitchen=config.use_kitchen,
-            use_kitchen_attention=config.use_kitchen_attention,
-            kitchen_attention_backend=config.kitchen_attention_backend,
+            enable_hyper_connection=config.enable_hyper_connections,
         )
 
     # Parse config.moe_layer_freq to determine the pattern of expert/dense layers.
@@ -666,13 +693,16 @@ def get_gpt_decoder_block_spec(
     layer_specs = get_gpt_decoder_layer_specs(
         config, use_transformer_engine, normalization, qk_l2_norm
     )
+
     # Slice the layer specs to only include the layers that are built in this pipeline stage.
     # Note: MCore layer_number starts at 1
     num_layers_to_build = get_num_layers_to_build(config, vp_stage=vp_stage, pp_rank=pp_rank)
 
     if config.pipeline_model_parallel_layout is not None:
         layout = config.pipeline_model_parallel_layout
-        assert isinstance(layout, PipelineParallelLayerLayout)
+        assert isinstance(
+            layout, PipelineParallelLayerLayout
+        ), f"Invalid pipeline model parallel layout: {layout}"
         local_layer_specs = [
             layer_specs[layer_id]
             for layer_id in layout.get_layer_id_list(
@@ -683,13 +713,13 @@ def get_gpt_decoder_block_spec(
         offset = get_transformer_layer_offset(config, vp_stage=vp_stage, pp_rank=pp_rank)
         local_layer_specs = layer_specs[offset : offset + num_layers_to_build]
 
+    # Block spec.
     if use_transformer_engine:
         layer_norm_impl = TENorm
     elif config.transformer_impl == "inference_optimized":
         layer_norm_impl = TENorm
     else:
         layer_norm_impl = LNImpl
-    # Block spec.
     block_spec = TransformerBlockSubmodules(
         layer_specs=local_layer_specs, layer_norm=layer_norm_impl
     )
@@ -706,22 +736,17 @@ def get_gpt_mtp_block_spec(
 ) -> MultiTokenPredictionBlockSubmodules:
     """GPT Multi-Token Prediction (MTP) block spec."""
     if use_transformer_engine:
-        backend: BackendSpecProvider = (
-            KitchenSpecProvider(
-                fallback=TESpecProvider(),
+        if config.use_kitchen:
+            backend: BackendSpecProvider = KitchenSpecProvider(
+                fallback=TESpecProvider(fallback_to_eager_attn=config.fallback_to_eager_attn),
                 use_kitchen_attention=config.use_kitchen_attention,
                 kitchen_attention_backend=config.kitchen_attention_backend,
             )
-            if config.use_kitchen
-            else TESpecProvider()
-        )
+        else:
+            backend = TESpecProvider(fallback_to_eager_attn=config.fallback_to_eager_attn)
     else:
         backend = (
-            KitchenSpecProvider(
-                fallback=LocalSpecProvider(),
-                use_kitchen_attention=config.use_kitchen_attention,
-                kitchen_attention_backend=config.kitchen_attention_backend,
-            )
+            KitchenSpecProvider(fallback=LocalSpecProvider())
             if config.use_kitchen
             else LocalSpecProvider()
         )
@@ -744,12 +769,22 @@ def get_gpt_mtp_block_spec_for_backend(
 
     if isinstance(spec, TransformerBlockSubmodules):
         # get the spec for the last layer of decoder block
-        transformer_layer_spec = spec.layer_specs[-1]
-    elif isinstance(spec, ModuleSpec) and spec.module == TransformerLayer:
-        transformer_layer_spec = spec
+        transformer_layer_spec = copy.copy(spec.layer_specs[-1])
+    elif isinstance(spec, ModuleSpec) and issubclass(spec.module, TransformerLayer):
+        transformer_layer_spec = copy.copy(spec)
     else:
         raise ValueError(f"Invalid spec: {spec}")
 
+    transformer_layer_spec.submodules = copy.copy(transformer_layer_spec.submodules)
+
+    # MTP does not support hyper connections yet; strip HC modules and
+    # downgrade the layer class to plain TransformerLayer.
+    transformer_layer_spec.submodules.self_attention_hyper_connection = IdentityOp
+    transformer_layer_spec.submodules.cross_attention_hyper_connection = IdentityOp
+    transformer_layer_spec.submodules.mlp_hyper_connection = IdentityOp
+    if transformer_layer_spec.module is HyperConnectionTransformerLayer:
+        transformer_layer_spec.module = TransformerLayer
+
     mtp_layer_spec = get_mtp_layer_spec_for_backend(
         mtp_model_layer_spec=transformer_layer_spec, backend=backend
     )
diff --git a/megatron/core/models/gpt/gpt_model.py b/megatron/core/models/gpt/gpt_model.py
index d63b2c1ddfa..d6496db09fd 100644
--- a/megatron/core/models/gpt/gpt_model.py
+++ b/megatron/core/models/gpt/gpt_model.py
@@ -25,6 +25,7 @@
 from megatron.core.quantization.utils import get_quant_config_or_none
 from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
 from megatron.core.transformer.enums import CudaGraphScope, ModelType
+from megatron.core.transformer.linear_cross_entropy import LinearCrossEntropyModule
 from megatron.core.transformer.multi_token_prediction import (
     MultiTokenPredictionBlock,
     mtp_on_this_rank,
@@ -146,6 +147,11 @@ def __init__(
             self.config, ignore_virtual=False, vp_stage=vp_stage
         )
 
+        self.fuse_linear_cross_entropy = (
+            self.config.cross_entropy_loss_fusion
+            and self.config.cross_entropy_fusion_impl == "linear"
+        )
+
         if self.pre_process or self.mtp_process:
             self.embedding = LanguageModelEmbedding(
                 config=self.config,
@@ -241,7 +247,7 @@ def __init__(
                 self.embedding_activation_buffer = None
                 self.grad_output_buffer = None
 
-            self.output_layer = tensor_parallel.ColumnParallelLinear(
+            self.output_layer = LinearCrossEntropyModule(
                 config.hidden_size,
                 self.vocab_size,
                 config=config,
@@ -459,19 +465,22 @@ def _preprocess(
     def preprocess_for_fine_grained_offloading(self):
         """Preprocess for fine-grained activation offloading."""
         off_interface.init_chunk_handler(
+            pp_rank=self.pg_collection.pp.rank(),
             vp_size=self.config.virtual_pipeline_model_parallel_size,
             vp_stage=self.vp_stage,
             min_offloaded_tensor_size=self.config.min_offloaded_tensor_size,
+            delta_offload_bytes_across_pp_ranks=self.config.delta_offload_bytes_across_pp_ranks,
+            activation_offload_fraction=self.config.activation_offload_fraction,
         )
         if self.disable_param_offloading:
             for param in self.decoder.parameters():
-                off_interface.mark_not_offloadable(param)
+                off_interface.mark_not_offload(param)
             if self.mtp_process:
                 for param in self.mtp.parameters():
-                    off_interface.mark_not_offloadable(param)
+                    off_interface.mark_not_offload(param)
             if self.post_process:
                 for param in self.output_layer.parameters():
-                    off_interface.mark_not_offloadable(param)
+                    off_interface.mark_not_offload(param)
             self.disable_param_offloading = False
 
     def forward(
@@ -674,9 +683,12 @@ def _postprocess(
                 reshaped = hidden_states.squeeze(1).unsqueeze(0)
                 hidden_states = inference_context.last_token_logits(reshaped).unsqueeze(1)
 
-        logits, _ = self.output_layer(
-            hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
-        )
+        if has_config_logger_enabled(self.config) or labels is None:
+            logits, _ = self.output_layer(
+                hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
+            )
+        else:
+            logits = None
 
         # Apply MuP output scaling to logits
         logits = self._scale_logits(logits)
@@ -706,7 +718,18 @@ def _postprocess(
             # [s b h] => [b s h]
             return logits.transpose(0, 1).contiguous()
 
-        loss = self.compute_language_model_loss(labels, logits)
+        output_layer_kwargs = dict(
+            input_=hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
+        )
+        if self.fuse_linear_cross_entropy:
+            loss = self.output_layer(
+                output_cross_entropy_loss=self.fuse_linear_cross_entropy,
+                labels=labels,
+                **output_layer_kwargs,
+            )
+        else:
+            logits, _ = self.output_layer(**output_layer_kwargs)
+            loss = self.compute_language_model_loss(labels, logits)
 
         return loss
 
diff --git a/megatron/core/models/mamba/mamba_model.py b/megatron/core/models/mamba/mamba_model.py
index e295c3d6b01..ae7a00eb1a3 100644
--- a/megatron/core/models/mamba/mamba_model.py
+++ b/megatron/core/models/mamba/mamba_model.py
@@ -204,6 +204,11 @@ def __init__(
         # TODO: remove this dependency ?
         self.model_type = ModelType.encoder_or_decoder
 
+        self.fuse_linear_cross_entropy = (
+            self.config.cross_entropy_loss_fusion
+            and self.config.cross_entropy_fusion_impl == "linear"
+        )
+
         if self.pre_process or self.mtp_process:
             self.embedding = LanguageModelEmbedding(
                 config=self.config,
@@ -471,7 +476,18 @@ def forward(
             # [s b h] => [b s h]
             return logits.transpose(0, 1).contiguous()
 
-        loss = self.compute_language_model_loss(labels, logits)
+        output_layer_kwargs = dict(
+            input_=hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
+        )
+        if self.fuse_linear_cross_entropy:
+            loss = self.output_layer(
+                output_cross_entropy_loss=self.fuse_linear_cross_entropy,
+                labels=labels,
+                **output_layer_kwargs,
+            )
+        else:
+            logits, _ = self.output_layer(**output_layer_kwargs)
+            loss = self.compute_language_model_loss(labels, logits)
 
         return loss
 
diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py
index c32ed77d1d7..b64c871104d 100644
--- a/megatron/core/optimizer/__init__.py
+++ b/megatron/core/optimizer/__init__.py
@@ -2,6 +2,7 @@
 import copy
 import logging
 import warnings
+from collections import defaultdict
 from dataclasses import astuple
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
@@ -54,7 +55,13 @@
 from ..transformer.module import MegatronModule
 from ..utils import get_model_config, get_pg_rank, get_pg_size, is_te_min_version, log_single_rank
 from .distrib_optimizer import DistributedOptimizer
+from .emerging_optimizers import (
+    _EMERGING_OPTIMIZERS,
+    HAVE_EMERGING_OPTIMIZERS,
+    _create_emerging_optimizer,
+)
 from .grad_scaler import ConstantGradScaler, DynamicGradScaler
+from .layer_wise_optimizer import LayerWiseDistributedOptimizer
 from .optimizer import (
     ChainedOptimizer,
     Float16OptimizerWithFloat16Params,
@@ -62,6 +69,8 @@
     MegatronOptimizer,
     param_group_identifier_keys,
 )
+
+# Subclass aliases kept for backward compatibility; all are OptimizerConfig.
 from .optimizer_config import (
     AdamOptimizerConfig,
     OptimizerConfig,
@@ -310,14 +319,6 @@ def _get_param_groups(
     # Map (pg_overrides, is_expert_parallel) to params.
     params_map = {}
 
-    if config_overrides is None:
-        # TODO remove this default behavior eventually.
-        #  This is only needed for backwards compatibility with the old config overrides API where
-        #  the config_overrides argument by default lead to bias parameters and length 1 parameters.
-        #  We assume that users of decoupled LR already provide config overrides so will adapt
-        #  to the new API.
-        config_overrides = get_standard_config_overrides(config=config)
-
     for model_chunk in model_chunks:
         for name, param in model_chunk.named_parameters():
             if not param.requires_grad:
@@ -452,7 +453,8 @@ def _get_megatron_optimizer_based_on_param_groups(
     intra_dist_opt_group: Optional[torch.distributed.ProcessGroup] = None,
     distributed_optimizer_instance_id: Optional[int] = 0,
     pg_collection: Optional[ProcessGroupCollection] = None,
-) -> MegatronOptimizer:
+    skip_megatron_wrapping: bool = False,
+) -> Union[MegatronOptimizer, Tuple[Optional[torch.optim.Optimizer], Optional[Callable]]]:
     """Get Megatron optimizer based on parameter groups.
 
     Args:
@@ -468,12 +470,24 @@ def _get_megatron_optimizer_based_on_param_groups(
             optimizer. Defaults to None.
         distributed_optimizer_instance_id (int, optional): Distributed optimizer instance. Defaults
             0.
+        skip_megatron_wrapping (bool): if True, return a
+            ``(optimizer, init_state_fn)`` tuple of the raw PyTorch optimizer
+            without any Megatron wrapping. Useful when the caller
+            (e.g. LayerWiseDistributedOptimizer) performs its own wrapping.
 
     Returns:
-        Instance of MegatronOptimizer.
+        Instance of MegatronOptimizer, or ``(optimizer, init_state_fn)`` when
+        *skip_megatron_wrapping=True*.
     """
-    # TODO: Logic needs to be updated to handle different optimizer types (i.e., param_groups
-    # passed into this function need to correspond to the same optimizer).
+    # All param_groups passed here must belong to the same optimizer type (adam / sgd).
+    # Callers are responsible for splitting by optimizer type before calling this function.
+
+    if skip_megatron_wrapping and config.use_precision_aware_optimizer:
+        raise ValueError(
+            "skip_megatron_wrapping=True is incompatible with use_precision_aware_optimizer."
+        )
+    if skip_megatron_wrapping and config.optimizer_cpu_offload:
+        raise ValueError("skip_megatron_wrapping=True is incompatible with optimizer_cpu_offload.")
 
     # When freezing sub-models we may have no trainable parameters on a rank and
     # hence an empty param_groups. However, we still need to create an optimizer
@@ -607,6 +621,9 @@ def init_state_fn(opt, config=None):
         optimizer = None
         init_state_fn = None
 
+    if skip_megatron_wrapping:
+        return optimizer, init_state_fn
+
     # Mixed precision optimizer.
     # - Note: both the Float16Optimizer and the DistributedOptimizer inherit
     #   from the MixedPrecisionOptimizer, which manages any optimizer where
@@ -697,6 +714,141 @@ def check_config_overrides_consistency(
     return True
 
 
+def _get_megatron_emerging_optimizer(
+    config: OptimizerConfig,
+    model_chunks: List[MegatronModule],
+    config_overrides: Optional[Dict[ParamKey, Any]] = None,
+    pg_collection: Optional[ProcessGroupCollection] = None,
+) -> MegatronOptimizer:
+    """Build an emerging optimizer (e.g. Muon) for the given model chunks.
+
+    Parameter separation (e.g., linear weights -> Muon, rest -> Adam) is expressed as a
+    config_override, the same mechanism used for weight-decay and learning-rate overrides.
+    Adam/SGD groups are delegated to _get_megatron_optimizer_based_on_param_groups so they
+    go through the exact same code path as the standard optimizer factory.
+
+    When ``config.use_layer_wise_distributed_optimizer`` is True, the underlying optimizers
+    are wrapped with :class:`LayerWiseDistributedOptimizer`.
+    """
+    eopt_name = config.optimizer
+    use_layer_wise = config.use_layer_wise_distributed_optimizer
+
+    # Handle legacy "dist_*" optimizer names (e.g. "dist_muon" → "muon" + layer-wise).
+    if eopt_name.startswith('dist_'):
+        bare_name = eopt_name[len('dist_') :]
+        warnings.warn(
+            f"optimizer='{eopt_name}' is deprecated. "
+            f"Use optimizer='{bare_name}' with use_layer_wise_distributed_optimizer=True.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+        eopt_name = bare_name
+        use_layer_wise = True
+
+    if not HAVE_EMERGING_OPTIMIZERS:
+        raise ImportError(
+            f"emerging-optimizers package is required for optimizer='{eopt_name}'. "
+            "Install it with: pip install emerging-optimizers"
+        )
+    if eopt_name not in _EMERGING_OPTIMIZERS:
+        raise ValueError(f"Unsupported emerging optimizer: {eopt_name}")
+    if config.fp16:
+        raise ValueError('emerging optimizer with fp16 is not supported.')
+
+    if pg_collection is None:
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+
+    log_single_rank(logger, logging.INFO, f'Setting up emerging optimizer with config {config}')
+
+    # Tag parameters with optimizer-specific attributes (expert_tp, is_qkv).
+    for model_chunk in model_chunks:
+        for name, param in model_chunk.named_parameters():
+            if not param.requires_grad:
+                continue
+            if 'experts' in name and 'shared' not in name:
+                param.expert_tp = True
+            # TODO(deyuf): support MLA
+            if 'linear_qkv.weight' in name and len(param.shape) == 2:
+                param.is_qkv = True
+
+    # Apply optimizer-specific default param overrides (e.g. muon: non-linear -> adam).
+    config_overrides.update(_EMERGING_OPTIMIZERS[eopt_name].default_param_overrides)
+
+    # Build param groups and bucket by (optimizer_name, is_expert_parallel).
+    # Layer-wise distributed optimizer handles expert params internally so we skip that split.
+    all_param_groups = _get_param_groups(model_chunks, config, config_overrides)
+    grouped_param_groups = defaultdict(list)
+    for group in all_param_groups:
+        opt_name = group.get('optimizer', eopt_name)
+        is_expert = group['is_expert_parallel'] and not use_layer_wise
+        grouped_param_groups[(opt_name, is_expert)].append(group)
+
+    # Build an optimizer for each (optimizer_name, is_expert) bucket and combine.
+    results = []
+    for (opt_name, is_expert), groups in grouped_param_groups.items():
+        if not groups:
+            continue
+
+        model_parallel_group = pg_collection.tp_ep_pp if is_expert else pg_collection.mp
+
+        if opt_name in _EMERGING_OPTIMIZERS:
+            optimizer, init_state_fn = _create_emerging_optimizer(
+                config, groups, eopt_name, model_chunks, pg_collection
+            )
+            if use_layer_wise:
+                result = (optimizer, init_state_fn)
+            else:
+                if config.bf16:
+                    optimizer = Float16OptimizerWithFloat16Params(
+                        optimizer, config, None, init_state_fn
+                    )
+                else:
+                    optimizer = FP32Optimizer(optimizer, config, init_state_fn)
+                setattr(optimizer, 'grad_stats_parallel_group', model_parallel_group)
+                if pg_collection is None or not hasattr(pg_collection, 'tp'):
+                    tp_group = parallel_state.get_tensor_model_parallel_group()
+                else:
+                    tp_group = pg_collection.tp
+                setattr(optimizer, 'tp_group', tp_group)
+                result = optimizer
+        else:
+            fallback_config = copy.copy(config)
+            fallback_config.optimizer = opt_name
+            fallback_config.use_distributed_optimizer = False
+            result = _get_megatron_optimizer_based_on_param_groups(
+                config=fallback_config,
+                model_chunks=model_chunks,
+                param_groups=groups,
+                model_parallel_group=model_parallel_group,
+                pg_collection=pg_collection,
+                skip_megatron_wrapping=use_layer_wise,
+            )
+            # TODO(deyuf): ChainedOptimizer currently asserts all sub-optimizers
+            # share the same config. Revisit this design now that emerging
+            # optimizers mix different optimizer types (e.g. Muon + Adam).
+            # For now, reset to the top-level config so the assertion holds.
+            if not use_layer_wise and hasattr(result, 'config'):
+                result.config = config
+        results.append(result)
+
+    if use_layer_wise:
+        base_optimizers, init_fns = (), ()
+        if results:
+            base_optimizers, init_fns = zip(*results)
+        log_single_rank(
+            logger, logging.INFO, f'Using LayerWiseDistributedOptimizer for {eopt_name}'
+        )
+        return LayerWiseDistributedOptimizer(
+            list(base_optimizers),
+            config,
+            pg_collection,
+            init_state_fn_list=list(init_fns),
+            model_chunks=model_chunks,
+        )
+
+    return ChainedOptimizer(results)
+
+
 def get_megatron_optimizer(
     config: OptimizerConfig,
     model_chunks: List[MegatronModule],
@@ -707,7 +859,10 @@ def get_megatron_optimizer(
 ) -> MegatronOptimizer:
     """Retrieve the Megatron optimizer for model chunks.
 
+    Handles both standard optimizers (Adam, SGD) and emerging optimizers (e.g. Muon).
     We use separate optimizers for expert parameters and non-expert parameters.
+    For emerging optimizers with ``config.use_layer_wise_distributed_optimizer=True``,
+    the optimizer is automatically wrapped with :class:`LayerWiseDistributedOptimizer`.
 
     Args:
         config (OptimizerConfig): optimizer configuration object.
@@ -724,10 +879,25 @@ def get_megatron_optimizer(
         Instance of MegatronOptimizer.
     """
 
-    log_single_rank(logger, logging.INFO, f'Setting up optimizer with config {config}')
+    # None → apply standard defaults. To extend defaults with custom overrides,
+    # start from get_standard_config_overrides(config) and merge yours in.
+    if config_overrides is None:
+        config_overrides = get_standard_config_overrides(config)
 
     check_config_overrides_consistency(config, config_overrides)
 
+    # TODO: the standard and emerging optimizer paths handle pg_collection differently;
+    # unify them so both use a single pg_collection-based flow.
+    if config.optimizer not in ('adam', 'sgd'):
+        return _get_megatron_emerging_optimizer(
+            config=config,
+            model_chunks=model_chunks,
+            config_overrides=config_overrides,
+            pg_collection=pg_collection,
+        )
+
+    log_single_rank(logger, logging.INFO, f'Setting up optimizer with config {config}')
+
     # Separate out first model chunk if overlapping param AG with optimizer step.
     if config.overlap_param_gather_with_optimizer_step:
         all_dense_model_chunks = [[model_chunks[0]], model_chunks[1:]]
diff --git a/megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py b/megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py
new file mode 100644
index 00000000000..81fd116c8ba
--- /dev/null
+++ b/megatron/core/optimizer/cpu_offloading/optimizer_state_offloader.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+"""Optimizer state offloading class."""
+
+from typing import TYPE_CHECKING, Dict, List, Tuple
+
+import torch
+
+if TYPE_CHECKING:
+    from megatron.core.optimizer.distrib_optimizer import DistributedOptimizer
+
+
+class OptimizerStateOffloader:
+    """
+    Manages offloading of optimizer states and master weights to CPU.
+    Used with DistributedOptimizer to reduce GPU memory usage.
+
+    Supports overlapped D2H/H2D transfers using CUDA streams.
+
+    Master weights can be stored in two locations:
+    - In adam optimizer state (when use_precision_aware_optimizer_no_fp8_or_ds_fp8 is True)
+    - In mcore's shard_fp32_from_float16_groups
+    """
+
+    OPTIMIZER_STATE_KEYS = ('exp_avg', 'exp_avg_sq')
+    MASTER_WEIGHT_KEY = 'master_param'
+
+    def __init__(self, distrib_optimizer: "DistributedOptimizer"):
+        """
+        Args:
+            distrib_optimizer: The DistributedOptimizer to offload states and master weights from.
+        """
+        self.dist_optimizer = distrib_optimizer
+        self.adam_optimizer = distrib_optimizer.optimizer
+
+        # Only support TE FusedAdam optimizer for now.
+        try:
+            from transformer_engine.pytorch.optimizers import FusedAdam
+
+            assert isinstance(self.adam_optimizer, FusedAdam), (
+                f"OptimizerStateOffloader requires TE FusedAdam optimizer, "
+                f"but got {type(self.adam_optimizer).__name__}"
+            )
+        except ImportError:
+            raise ImportError(
+                "OptimizerStateOffloader requires transformer_engine.pytorch.optimizers.FusedAdam"
+            )
+
+        # Check if master weights are stored in adam optimizer state
+        self.optimizer_contains_master_weights = self.adam_optimizer.master_weights
+
+        # CUDA streams for async transfers
+        self._d2h_stream = torch.cuda.Stream()
+        self._h2d_stream = torch.cuda.Stream()
+
+        # CPU buffers for optimizer states: {param: {key: cpu_tensor}}
+        self._opt_state_cpu_buffers: Dict[torch.Tensor, Dict[str, torch.Tensor]] = {}
+
+        # CPU buffers for mcore master weights, matching the structure of source groups
+        # List[List[cpu_tensor]]
+        self._shard_fp32_from_float16_cpu_buffers: List[List[torch.Tensor]] = []
+
+        # State tracking
+        self._offloaded = False
+        self._offloaded_state_keys: Tuple[str, ...] = ()
+        self._offloaded_mcore_master_weights = False
+
+        # Track whether optimizer states (exp_avg, exp_avg_sq) have been initialized.
+        # These are lazily initialized by FusedAdam during the first optimizer.step().
+        # Master weights (shard_fp32_from_float16_groups) are available from the start.
+        self._optimizer_states_initialized = False
+
+    def mark_optimizer_states_initialized(self):
+        """
+        Mark that optimizer states (exp_avg, exp_avg_sq) are now available.
+        Should be called after the first optimizer.step() completes.
+        """
+        self._optimizer_states_initialized = True
+
+    def _get_state_keys_to_offload(
+        self, offload_optimizer_states: bool, offload_master_weights: bool
+    ) -> Tuple[str, ...]:
+        """Get the state keys in FusedAdam to offload based on configuration."""
+        keys = []
+        # Skip optimizer states offloading if they haven't been initialized yet.
+        # Optimizer states are lazily initialized by FusedAdam during the first optimizer.step().
+        if self._optimizer_states_initialized:
+            if offload_optimizer_states:
+                keys.extend(self.OPTIMIZER_STATE_KEYS)
+            if offload_master_weights and self.optimizer_contains_master_weights:
+                keys.append(self.MASTER_WEIGHT_KEY)
+        return tuple(keys)
+
+    def _ensure_state_cpu_buffer(
+        self, param: torch.Tensor, state_key: str, gpu_tensor: torch.Tensor, pin_memory: bool = True
+    ) -> torch.Tensor:
+        """Get or create a CPU buffer for a state tensor."""
+        if param not in self._opt_state_cpu_buffers:
+            self._opt_state_cpu_buffers[param] = {}
+
+        if state_key not in self._opt_state_cpu_buffers[param]:
+            cpu_buffer = torch.empty(
+                gpu_tensor.size(),
+                dtype=gpu_tensor.dtype,
+                layout=gpu_tensor.layout,
+                device='cpu',
+                pin_memory=pin_memory,
+            )
+            self._opt_state_cpu_buffers[param][state_key] = cpu_buffer
+
+        return self._opt_state_cpu_buffers[param][state_key]
+
+    def _offload_shard_groups(
+        self,
+        shard_groups: List[List[torch.Tensor]],
+        cpu_buffers: List[List[torch.Tensor]],
+        pin_memory: bool = True,
+    ):
+        """Offload a shard group to CPU buffers."""
+        # Initialize CPU buffers on first call
+        if len(cpu_buffers) == 0:
+            for group in shard_groups:
+                group_buffers = []
+                for gpu_tensor in group:
+                    cpu_buffer = torch.empty(
+                        gpu_tensor.size(),
+                        dtype=gpu_tensor.dtype,
+                        layout=gpu_tensor.layout,
+                        device='cpu',
+                        pin_memory=pin_memory,
+                    )
+                    group_buffers.append(cpu_buffer)
+                cpu_buffers.append(group_buffers)
+
+        # Copy D2H
+        for group_idx, group in enumerate(shard_groups):
+            for param_idx, gpu_tensor in enumerate(group):
+                cpu_buffer = cpu_buffers[group_idx][param_idx]
+                cpu_buffer.copy_(gpu_tensor, non_blocking=pin_memory)
+                gpu_tensor.record_stream(self._d2h_stream)
+
+    def _offload_states(
+        self,
+        offload_optimizer_states: bool,
+        offload_master_weights: bool,
+        use_pin_memory: bool = True,
+    ):
+        """Offload optimizer states and/or master weights to CPU."""
+        # Offload states from adam optimizer
+        self._offloaded_state_keys = self._get_state_keys_to_offload(
+            offload_optimizer_states, offload_master_weights
+        )
+        states = self.adam_optimizer.state
+
+        for param, param_state in states.items():
+            for state_key in self._offloaded_state_keys:
+                if state_key not in param_state:
+                    continue
+
+                gpu_tensor = param_state[state_key]
+                if not isinstance(gpu_tensor, torch.Tensor) or not gpu_tensor.is_cuda:
+                    continue
+
+                cpu_buffer = self._ensure_state_cpu_buffer(
+                    param, state_key, gpu_tensor, use_pin_memory
+                )
+                cpu_buffer.copy_(gpu_tensor, non_blocking=use_pin_memory)
+                gpu_tensor.record_stream(self._d2h_stream)
+
+        # Offload mcore master weights if not in optimizer state
+        if offload_master_weights and not self.optimizer_contains_master_weights:
+            self._offload_shard_groups(
+                self.dist_optimizer.shard_fp32_from_float16_groups,
+                self._shard_fp32_from_float16_cpu_buffers,
+                use_pin_memory,
+            )
+            self._offloaded_mcore_master_weights = True
+
+    def _release_states(self):
+        """Replace optimizer state GPU tensors with CPU tensors to free GPU memory."""
+        states = self.adam_optimizer.state
+
+        for param, param_state in states.items():
+            if param not in self._opt_state_cpu_buffers:
+                continue
+
+            for state_key in self._offloaded_state_keys:
+                if state_key not in self._opt_state_cpu_buffers[param]:
+                    continue
+
+                param_state[state_key].untyped_storage().resize_(0)
+
+        if self._offloaded_mcore_master_weights:
+            for group in self.dist_optimizer.shard_fp32_from_float16_groups:
+                for gpu_tensor in group:
+                    gpu_tensor.untyped_storage().resize_(0)
+
+    def _reload_shard_groups(
+        self,
+        shard_groups: List[List[torch.Tensor]],
+        cpu_buffers: List[List[torch.Tensor]],
+        is_allocate_stage: bool,
+    ):
+        """Reload shard groups from CPU to GPU."""
+        for group_idx, group in enumerate(shard_groups):
+            for param_idx, _ in enumerate(group):
+                cpu_buffer = cpu_buffers[group_idx][param_idx]
+                if is_allocate_stage:
+                    shard_groups[group_idx][param_idx].untyped_storage().resize_(
+                        cpu_buffer.untyped_storage().size()
+                    )
+                else:
+                    shard_groups[group_idx][param_idx].copy_(
+                        cpu_buffer, non_blocking=cpu_buffer.is_pinned()
+                    )
+
+    def _reload_states(self, is_allocate_stage: bool):
+        """
+        Reload optimizer states and/or master weights from CPU to GPU.
+
+        If is_allocate_stage is True, only allocate GPU memory for the states and master weights,
+        but do not copy the data from CPU to GPU. Otherwise, copy the data from CPU to GPU.
+        The two processes are separated to make sure that the GPU memory is allocated on the
+        default stream to avoid fragmentation.
+        """
+        # Reload states to adam optimizer
+        states = self.adam_optimizer.state
+
+        for param, param_state in states.items():
+            if param not in self._opt_state_cpu_buffers:
+                continue
+
+            for state_key in self._offloaded_state_keys:
+                if state_key not in self._opt_state_cpu_buffers[param]:
+                    continue
+
+                cpu_buffer = self._opt_state_cpu_buffers[param][state_key]
+                if is_allocate_stage:
+                    param_state[state_key].untyped_storage().resize_(
+                        cpu_buffer.untyped_storage().size()
+                    )
+                else:
+                    param_state[state_key].copy_(cpu_buffer, non_blocking=cpu_buffer.is_pinned())
+
+        # Reload mcore master weights if not in optimizer state
+        if self._offloaded_mcore_master_weights:
+            self._reload_shard_groups(
+                self.dist_optimizer.shard_fp32_from_float16_groups,
+                self._shard_fp32_from_float16_cpu_buffers,
+                is_allocate_stage,
+            )
+
+    def offload(self, offload_optimizer_states: bool = True, offload_master_weights: bool = True):
+        """
+        Offload optimizer states and/or master weights to CPU.
+        Starts async D2H transfer that can overlap with other operations.
+
+        Args:
+            offload_optimizer_states: Whether to offload exp_avg, exp_avg_sq.
+            offload_master_weights: Whether to offload master weights.
+        """
+        if not offload_optimizer_states and not offload_master_weights:
+            return
+
+        # Wait for current stream finishing updating the optimizer states.
+        self._d2h_stream.wait_stream(torch.cuda.current_stream())
+
+        with torch.cuda.stream(self._d2h_stream):
+            self._offload_states(offload_optimizer_states, offload_master_weights)
+
+        self._offloaded = True
+
+    def release_gpu_memory(self):
+        """
+        Release GPU memory for optimizer states and master weights after D2H copy completes.
+
+        This is separated from offload() to allow delayed GPU memory release,
+        which is needed for mxfp8 + overlap_param_gather case where master weights
+        must remain on GPU until after _copy_main_params_to_param_buffer() is called.
+        """
+        if not self._offloaded:
+            return
+
+        self._release_states()
+
+    def reload(self):
+        """
+        Reload optimizer states and/or master weights from CPU to GPU.
+        Call before optimizer.step() to ensure states are on GPU.
+        """
+        if not self._offloaded:
+            return
+
+        # Allocate GPU memory on the current stream to avoid fragmentation.
+        self._reload_states(is_allocate_stage=True)
+
+        self._h2d_stream.wait_stream(self._d2h_stream)
+        self._h2d_stream.wait_stream(torch.cuda.current_stream())
+
+        # Reload states on the h2d stream to overlap with other operations.
+        with torch.cuda.stream(self._h2d_stream):
+            self._reload_states(is_allocate_stage=False)
+
+        self._offloaded_state_keys = ()
+        self._offloaded_mcore_master_weights = False
+        self._offloaded = False
+
+    def sync_before_step(self):
+        """
+        Wait for H2D reload to complete before optimizer.step().
+        Must be called to ensure states are on GPU before optimizer uses them.
+
+        This is separated from reload() to make it possible to move the reload ahead of time.
+        """
+        torch.cuda.current_stream().wait_stream(self._h2d_stream)
diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py
index eeda383a75d..beb00391759 100644
--- a/megatron/core/optimizer/distrib_optimizer.py
+++ b/megatron/core/optimizer/distrib_optimizer.py
@@ -52,6 +52,7 @@
 from ..fp8_utils import dequantize_fp8_tensor, is_float8tensor, quantize_param_shard
 from ..transformer.fsdp_dtensor_checkpoint import handle_experts_in_state_dict
 from ..transformer.module import MegatronModule
+from .cpu_offloading.optimizer_state_offloader import OptimizerStateOffloader
 from .grad_scaler import MegatronGradScaler
 from .optimizer import MixedPrecisionOptimizer, _zero_grad_group_helper, param_group_identifier_keys
 from .optimizer_config import OptimizerConfig
@@ -361,7 +362,10 @@ def _build_model_and_main_param_groups(
                 if model_param.type() in ['torch.cuda.HalfTensor', 'torch.cuda.BFloat16Tensor']:
 
                     # Generate sharded model param.
-                    if is_float8tensor(model_param) and config.fp8_recipe != "delayed":
+                    if (
+                        cls._is_distopt_quantized_param(model_param)
+                        and config.fp8_recipe != "delayed"
+                    ):
                         # MXFP8Tensor and BlockwiseQTensor don't support view(-1)
                         shard_model_param = None
                     else:
@@ -381,7 +385,7 @@ def _build_model_and_main_param_groups(
                         # precision at the beginning of training (this problem will not occur if the
                         # training is long enough or if the main params are loaded from a
                         # checkpoint).
-                        if is_float8tensor(model_param):
+                        if cls._is_distopt_quantized_param(model_param):
                             if hasattr(model_param, 'get_high_precision_init_val'):
                                 shard_main_param = (
                                     model_param.get_high_precision_init_val()
@@ -519,6 +523,8 @@ def __init__(
             "due to checkpointing requirements."
         )
 
+        self._state_offloader: Optional[OptimizerStateOffloader] = None
+
         # when freezing sub-models we have no real optimizer
         # but still need a stub DistributedOptimizer class
         if optimizer is None:
@@ -607,6 +613,9 @@ def __init__(
             self.optimizer.param_groups = [g["orig_group"] for g in self.opt_group_ranges]
             self.optimizer.load_state_dict(self.optimizer.state_dict())
 
+        if self.config.offload_optimizer_states:
+            self._state_offloader = OptimizerStateOffloader(self)
+
     def _get_model_param_range_map(self, param: torch.nn.Parameter):
         """
         Given a model param, get the index sub-range of the param that this
@@ -913,6 +922,70 @@ def _get_main_param_and_optimizer_states(self, model_param):
                     tensors[k] = v
         return tensors
 
+    @staticmethod
+    def _is_grouped_quantized_tensor(tensor: torch.Tensor) -> bool:
+        """Check if tensor is a TE GroupedTensor using quantized storage."""
+        return (
+            hasattr(tensor, "split_into_quantized_tensors")
+            and callable(tensor.split_into_quantized_tensors)
+            and getattr(tensor, "quantizer", None) is not None
+        )
+
+    @classmethod
+    def _is_distopt_quantized_param(cls, tensor: torch.Tensor) -> bool:
+        """Check if tensor should follow quantized parameter path in dist optimizer."""
+        return is_float8tensor(tensor) or cls._is_grouped_quantized_tensor(tensor)
+
+    def _expand_quantized_param_shard_for_cast(
+        self,
+        model_param: torch.Tensor,
+        shard_main_param: Optional[torch.Tensor],
+        start_offset: Optional[int],
+    ):
+        """Expand one quantized model param to cast-ready entries.
+
+        For grouped quantized tensors, split into member quantized tensors and map the sharded
+        master slice to per-member offset ranges, while preserving deterministic ordering across
+        DP ranks.
+        """
+        if not self._is_grouped_quantized_tensor(model_param):
+            return [model_param], [shard_main_param], [start_offset]
+
+        quantized_members = model_param.quantized_tensors
+        if quantized_members is None:
+            quantized_members = model_param.split_into_quantized_tensors()
+
+        shard_start = 0 if start_offset is None else start_offset
+        shard_size = 0 if shard_main_param is None else shard_main_param.numel()
+        shard_end = shard_start + shard_size
+        shard_flat = None if shard_main_param is None else shard_main_param.view(-1)
+
+        expanded_model_params = []
+        expanded_shard_main_params = []
+        expanded_start_offsets = []
+        member_offset = 0
+        for member in quantized_members:
+            member_numel = member.numel()
+            member_start = member_offset
+            member_end = member_start + member_numel
+            overlap_start = max(member_start, shard_start)
+            overlap_end = min(member_end, shard_end)
+
+            member_master = None
+            member_start_offset = None
+            if overlap_start < overlap_end:
+                local_start = overlap_start - shard_start
+                local_end = overlap_end - shard_start
+                member_master = shard_flat[local_start:local_end]
+                member_start_offset = overlap_start - member_start
+
+            expanded_model_params.append(member)
+            expanded_shard_main_params.append(member_master)
+            expanded_start_offsets.append(member_start_offset)
+            member_offset = member_end
+
+        return expanded_model_params, expanded_shard_main_params, expanded_start_offsets
+
     def _set_main_param_and_optimizer_states(self, model_param, tensors):
         """Set the main param and optimizer states corresponding to the input model_param.
 
@@ -2145,7 +2218,7 @@ def split_state_dict_if_needed(self, state_dict):
         fp8_gbuf_indices = []
         for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
             for dtype, _ in gbuf_range_maps.items():
-                if is_float8tensor(self.buffers[gbuf_idx].params[0]):
+                if self._is_distopt_quantized_param(self.buffers[gbuf_idx].params[0]):
                     fp8_gbuf_indices.append(gbuf_idx)
         if len(fp8_gbuf_indices) == 0:
             return
@@ -2167,7 +2240,7 @@ def split_state_dict_if_needed(self, state_dict):
         new_state_dict = {'buckets_coalesced': state_dict['buckets_coalesced']}
         for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
             for dtype, _ in gbuf_range_maps.items():
-                if not is_float8tensor(self.buffers[gbuf_idx].params[0]):
+                if not self._is_distopt_quantized_param(self.buffers[gbuf_idx].params[0]):
                     new_state_dict[gbuf_idx] = state_dict[dtype_to_gbuf_idx[dtype]]
 
         for fp8_gbuf_idx in fp8_gbuf_indices:
@@ -2367,7 +2440,7 @@ def _get_fp8_params_and_shard_fp32_from_fp8(self):
         idx = 0
         for buffer in buffers:
             for param in buffer.params:
-                if is_float8tensor(param):
+                if self._is_distopt_quantized_param(param):
                     fp8_params.append(param)
                     shard_fp32_from_fp8.append(None)
                     shard_offsets_in_fp8.append(None)
@@ -2382,7 +2455,7 @@ def get_shard_fp32_from_fp8(shard_main_groups, model_groups):
             """
             for shard_main_group, model_group in zip(shard_main_groups, model_groups):
                 for shard_main_param, model_param in zip(shard_main_group, model_group):
-                    if is_float8tensor(model_param):
+                    if self._is_distopt_quantized_param(model_param):
                         param_range_map = self._get_model_param_range_map(model_param)
                         param_range = param_range_map["param"]
                         assert param_range.size == shard_main_param.nelement()
@@ -2459,8 +2532,29 @@ def _copy_main_params_to_model_params(self):
         if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
             return
 
+        fp8_params, shard_fp32_from_fp8, shard_offsets_in_fp8 = (
+            self._get_fp8_params_and_shard_fp32_from_fp8()
+        )
+        expanded_fp8_params = []
+        expanded_shard_fp32_from_fp8 = []
+        expanded_shard_offsets_in_fp8 = []
+        for model_param, shard_main_param, start_offset in zip(
+            fp8_params, shard_fp32_from_fp8, shard_offsets_in_fp8
+        ):
+            sub_model_params, sub_shard_main_params, sub_start_offsets = (
+                self._expand_quantized_param_shard_for_cast(
+                    model_param, shard_main_param, start_offset
+                )
+            )
+            expanded_fp8_params.extend(sub_model_params)
+            expanded_shard_fp32_from_fp8.extend(sub_shard_main_params)
+            expanded_shard_offsets_in_fp8.extend(sub_start_offsets)
+
         quantize_param_shard(
-            *self._get_fp8_params_and_shard_fp32_from_fp8(), self.data_parallel_group
+            expanded_fp8_params,
+            expanded_shard_fp32_from_fp8,
+            expanded_shard_offsets_in_fp8,
+            self.data_parallel_group,
         )
 
         # Utility method for copying group params.
@@ -2480,7 +2574,7 @@ def copy_group_params(shard_main_groups, model_groups):
                         world_range.start : world_range.end
                     ]
 
-                    if is_float8tensor(model_param):
+                    if self._is_distopt_quantized_param(model_param):
                         # FP8 params are quantized in the above "quantize_param_shard" function.
                         continue
                     else:
@@ -2592,8 +2686,12 @@ def copy_group_params(model_groups, shard_main_groups):
                         # Use param from state_dict to initialize main_param
                         model_param = model_param_to_state_dict_param_map[model_param]
 
-                    if is_float8tensor(model_param):
-                        shard_model_param = dequantize_fp8_tensor(model_param).view(-1)[
+                    if self._is_distopt_quantized_param(model_param):
+                        if self._is_grouped_quantized_tensor(model_param):
+                            dequantized_model_param = model_param.float()
+                        else:
+                            dequantized_model_param = dequantize_fp8_tensor(model_param)
+                        shard_model_param = dequantized_model_param.view(-1)[
                             param_range.start : param_range.end
                         ]
                     else:
@@ -2612,6 +2710,8 @@ def step_with_ready_grads(self) -> bool:
         Under the hood, either launch synchronous param all-gathers or get ready to launch
         asynchorous all-gathers that get overlapped with the next forward pass.
         """
+        if self._state_offloader is not None:
+            self._state_offloader.sync_before_step()
         update_successful = super().step_with_ready_grads()
 
         timers = self.config.timers
@@ -2632,4 +2732,22 @@ def step_with_ready_grads(self) -> bool:
         if timers is not None:
             timers('params-all-gather').stop()
 
+        if self._state_offloader is not None:
+            self._state_offloader.mark_optimizer_states_initialized()
+
         return update_successful
+
+    def offload_states(self):
+        """Offload states to CPU."""
+        if self._state_offloader is not None:
+            self._state_offloader.offload()
+
+    def reload_offloaded_states(self):
+        """Start async reload of offloaded states."""
+        if self._state_offloader is not None:
+            self._state_offloader.reload()
+
+    def release_offloaded_gpu_states(self):
+        """Release GPU memory after D2H completes. For delayed release case."""
+        if self._state_offloader is not None:
+            self._state_offloader.release_gpu_memory()
diff --git a/megatron/core/optimizer/emerging_optimizers.py b/megatron/core/optimizer/emerging_optimizers.py
new file mode 100644
index 00000000000..74a0d0204f3
--- /dev/null
+++ b/megatron/core/optimizer/emerging_optimizers.py
@@ -0,0 +1,378 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""Emerging optimizer registry.
+
+To add a new emerging optimizer:
+  1. Define its optimizer class (or import it).
+  2. Write its ``_<name>_init_state_fn`` and ``_<name>_config_to_kwargs``.
+  3. Add an ``EmergingOptimizerEntry`` to ``_EMERGING_OPTIMIZERS`` at the bottom.
+"""
+
+import inspect
+import logging
+from dataclasses import dataclass, field
+from typing import Any, Callable, Dict, List, Literal, Optional
+
+import torch
+from torch.optim.optimizer import ParamsT
+
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.utils import get_pg_size, log_single_rank
+
+from .optimizer_config import ParamKey, ParamPredicate
+
+try:
+    from emerging_optimizers import registry
+    from emerging_optimizers.orthogonalized_optimizers import (
+        AdaptiveMuon,
+        OrthogonalizedOptimizer,
+        get_muon_scale_factor,
+    )
+    from emerging_optimizers.orthogonalized_optimizers.muon_utils import newton_schulz_tp
+    from emerging_optimizers.scalar_optimizers import Lion  # pylint: disable=unused-import
+
+    # It is necessary to import optimizers for the registry to work.
+    from emerging_optimizers.soap import SOAP  # pylint: disable=unused-import
+
+    HAVE_EMERGING_OPTIMIZERS = True
+except ImportError:
+    HAVE_EMERGING_OPTIMIZERS = False
+    OrthogonalizedOptimizer = object
+    AdaptiveMuon = object
+
+
+logger = logging.getLogger(__name__)
+
+
+# ===========================================================================
+# Registry dataclass and public API
+# ===========================================================================
+
+
+def _eopt_init_state_fn(opt, config=None):
+    """Initialize emerging optimizer state for torch_dist checkpoint format."""
+    for group in opt.param_groups:
+        # Checkpoint init needs state for all parameters, including those without grads yet.
+        opt._init_group(group, skip_non_grad_params=False)
+
+
+def _default_param_overrides_factory() -> Dict[ParamKey, Dict[str, Any]]:
+    """Default param overrides: route non-linear/embedding params to Adam."""
+    return {
+        ParamKey(
+            predicate=ParamPredicate(name="nonlinear_or_embedding", fn=_is_nonlinear_or_embedding)
+        ): {'optimizer': 'adam'}
+    }
+
+
+@dataclass
+class EmergingOptimizerEntry:
+    """Everything needed to create and configure an emerging optimizer.
+
+    Attributes:
+        optimizer_cls: The torch optimizer class.
+        init_state_fn: Lazily initialises optimizer state (needed for checkpoint formats).
+        config_to_kwargs: ``(config, model_chunks, pg_collection) -> dict`` of constructor kwargs.
+        default_param_overrides: Per-parameter config overrides applied automatically
+            (e.g. route non-linear params to Adam).
+    """
+
+    optimizer_cls: type
+    init_state_fn: Callable = _eopt_init_state_fn
+    config_to_kwargs: Callable | None = None
+    default_param_overrides: Dict[ParamKey, Dict[str, Any]] = field(
+        default_factory=_default_param_overrides_factory
+    )
+
+
+def _create_emerging_optimizer(config, param_groups, eopt_name, model_chunks, pg_collection):
+    """Instantiate an emerging optimizer and return it with its init_state_fn."""
+    entry = _EMERGING_OPTIMIZERS[eopt_name]
+    if entry.config_to_kwargs is not None:
+        eopt_kwargs = entry.config_to_kwargs(config, model_chunks, pg_collection)
+    else:
+        eopt_kwargs = _default_adam_based_eopt_config_to_kwargs(
+            eopt_name, config, model_chunks, pg_collection
+        )
+    optimizer = entry.optimizer_cls(param_groups, **eopt_kwargs)
+    return optimizer, entry.init_state_fn
+
+
+# ===========================================================================
+# Shared helpers
+# ===========================================================================
+
+
+def _is_nonlinear_or_embedding(param):
+    """True for parameters that should NOT use the emerging optimizer."""
+    return getattr(param, 'is_embedding_or_output_parameter', False) or len(param.shape) != 2
+
+
+def _get_qkv_split_shapes(model_cfg) -> List[int]:
+    """Compute QKV split shapes from model config."""
+    return [
+        model_cfg.num_attention_heads // model_cfg.num_query_groups * model_cfg.kv_channels,
+        model_cfg.kv_channels,
+        model_cfg.kv_channels,
+    ]
+
+
+# ===========================================================================
+# Registry – populated below only when emerging_optimizers is installed.
+# ===========================================================================
+
+
+# ===========================================================================
+# Muon
+# ===========================================================================
+
+
+class TensorParallelMuon(OrthogonalizedOptimizer):
+    """Tensor Parallel Muon optimizer."""
+
+    def __init__(
+        self,
+        params: ParamsT,
+        lr: float = 3e-4,
+        momentum: float = 0.95,
+        nesterov: bool = True,
+        weight_decay: float = 0.01,
+        use_decoupled_weight_decay: bool = True,
+        split_qkv: bool = False,
+        is_qkv_fn: Callable[[torch.Tensor], bool] | None = None,
+        qkv_split_shapes: tuple[int, int, int] | None = None,
+        fp32_matmul_prec: str = "medium",
+        coefficient_type: str = "quintic",
+        num_ns_steps: int = 5,
+        scale_mode: str = "spectral",
+        extra_scale_factor: float = 1.0,
+        pg_collection: Optional[ProcessGroupCollection] = None,
+        tp_mode: Literal["blockwise", "duplicated", "distributed"] = "duplicated",
+    ) -> None:
+        if num_ns_steps < 1:
+            raise ValueError(f"num_ns_steps must be at least 1, got {num_ns_steps}")
+
+        def scaled_orthogonalize_fn(
+            grad: torch.Tensor,
+            tp_group: torch.distributed.ProcessGroup,
+            partition_dim: int | None = None,
+        ) -> torch.Tensor:
+            log_single_rank(
+                logger,
+                logging.DEBUG,
+                f'Orthogonalizing grad with {num_ns_steps} steps, '
+                f'{coefficient_type} coefficient, '
+                f'{scale_mode} scale mode, extra_scale_factor={extra_scale_factor}',
+            )
+            size = [grad.size(-2), grad.size(-1)]
+            if partition_dim is not None:
+                size[partition_dim] *= get_pg_size(tp_group)
+            orth_grad = newton_schulz_tp(
+                grad,
+                steps=num_ns_steps,
+                coefficient_type=coefficient_type,
+                tp_group=tp_group,
+                partition_dim=partition_dim,
+                tp_mode="duplicated" if tp_mode == "blockwise" else tp_mode,
+            )
+            scale_factor = get_muon_scale_factor(size[0], size[1], mode=scale_mode)
+            return orth_grad * scale_factor * extra_scale_factor
+
+        self.pg_collection = pg_collection
+        self.tp_mode = tp_mode
+        self.split_qkv = split_qkv
+        self.is_qkv_fn = is_qkv_fn
+        self.qkv_split_shapes = qkv_split_shapes
+
+        weight_decay_method = "decoupled" if use_decoupled_weight_decay else "l2"
+        # Use explicit class call instead of super() so that subclasses with
+        # multiple inheritance (e.g. TensorParallelAdaptiveMuon) don't route
+        # through an intermediate class that doesn't accept scaled_orthogonalize_fn.
+        OrthogonalizedOptimizer.__init__(
+            self,
+            params,
+            lr,
+            momentum,
+            nesterov=nesterov,
+            weight_decay=weight_decay,
+            weight_decay_method=weight_decay_method,
+            fp32_matmul_prec=fp32_matmul_prec,
+            scaled_orthogonalize_fn=scaled_orthogonalize_fn,
+        )
+
+    def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> torch.Tensor:
+        """Orthogonalize the momentum.
+
+        Args:
+            p: The parameter tensor. i is necessary to pass param tensor in addition to
+                momentum because a lot of information is only available in the param tensor,
+                attributes for example.
+            grad: The momentum tensor.
+
+        Returns:
+            The orthogonalized gradient tensor.
+        """
+        # TODO(deyuf): switch to group
+        if self.pg_collection:
+            tp_group = (
+                self.pg_collection.expt_tp
+                if getattr(p, 'expert_tp', False)
+                else self.pg_collection.tp
+            )
+        else:
+            tp_group = None
+        partition_dim = None if self.tp_mode == "blockwise" else getattr(p, "partition_dim", None)
+        if partition_dim == -1:
+            partition_dim = None
+
+        if self.split_qkv and self.is_qkv_fn(p):  # type: ignore[misc]
+            grad_shape = grad.shape
+            log_single_rank(
+                logger,
+                logging.DEBUG,
+                f'qkv split grad shape {grad_shape}, ' f'split shapes {self.qkv_split_shapes}',
+            )
+            num_query_groups = grad_shape[0] // sum(self.qkv_split_shapes)
+            qkv_grads = torch.split(
+                grad.view(num_query_groups, sum(self.qkv_split_shapes), -1),
+                self.qkv_split_shapes,
+                dim=1,
+            )
+            qkv_grads = [g.reshape(-1, grad_shape[-1]) for g in qkv_grads]
+
+            qkv_grads = [
+                self.scaled_orthogonalize_fn(g, tp_group, partition_dim).view(
+                    num_query_groups, -1, grad_shape[-1]
+                )
+                for g in qkv_grads
+            ]
+            grad = torch.cat(qkv_grads, dim=1).view(grad_shape)
+        else:
+            grad = self.scaled_orthogonalize_fn(grad, tp_group, partition_dim)
+        return grad
+
+
+class TensorParallelAdaptiveMuon(TensorParallelMuon, AdaptiveMuon):
+    """Tensor Parallel Adaptive Muon optimizer."""
+
+    def __init__(
+        self,
+        params: ParamsT,
+        lr: float = 3e-4,
+        momentum: float = 0.95,
+        nesterov: bool = True,
+        weight_decay: float = 0.01,
+        use_decoupled_weight_decay: bool = True,
+        split_qkv: bool = False,
+        is_qkv_fn: Callable[[torch.Tensor], bool] | None = None,
+        qkv_split_shapes: tuple[int, int, int] | None = None,
+        fp32_matmul_prec: str = "medium",
+        coefficient_type: str = "quintic",
+        num_ns_steps: int = 5,
+        scale_mode: str = "spectral",
+        extra_scale_factor: float = 1.0,
+        pg_collection: Optional[ProcessGroupCollection] = None,
+        tp_mode: Literal["blockwise", "duplicated", "distributed"] = "duplicated",
+        moment2_method: Literal["adamuon", "normuon"] = "adamuon",
+        beta2: float = 0.95,
+        eps: float = 1e-8,
+    ) -> None:
+        TensorParallelMuon.__init__(
+            self,
+            params,
+            lr=lr,
+            momentum=momentum,
+            nesterov=nesterov,
+            weight_decay=weight_decay,
+            use_decoupled_weight_decay=use_decoupled_weight_decay,
+            split_qkv=split_qkv,
+            is_qkv_fn=is_qkv_fn,
+            qkv_split_shapes=qkv_split_shapes,
+            fp32_matmul_prec=fp32_matmul_prec,
+            coefficient_type=coefficient_type,
+            num_ns_steps=num_ns_steps,
+            scale_mode=scale_mode,
+            extra_scale_factor=extra_scale_factor,
+            pg_collection=pg_collection,
+            tp_mode=tp_mode,
+        )
+        self.moment2_method = moment2_method
+
+        for group in self.param_groups:
+            group.setdefault("beta2", beta2)
+            group.setdefault("eps", eps)
+
+    @torch.no_grad()  # type: ignore[misc]
+    def step(self, closure: Optional[Callable] = None) -> Optional[float]:
+        """Step function"""
+        return AdaptiveMuon.step(self, closure)
+
+
+def _kwargs_from_config(optimizer_cls: type, prefix: str, config) -> Dict[str, Any]:
+    """Match ``optimizer_cls.__init__`` parameters to config attributes.
+
+    For each init parameter, looks for ``{prefix}_{name}`` on *config* first,
+    then falls back to ``{name}`` (unprefixed).  ``self`` and ``params`` are
+    always skipped.
+    """
+    skip_params = {"self", "params"}
+    sig = inspect.signature(optimizer_cls.__init__)
+    kwargs: Dict[str, Any] = {}
+    for name in sig.parameters:
+        if name in skip_params:
+            continue
+        prefixed = f"{prefix}_{name}"
+        if hasattr(config, prefixed):
+            kwargs[name] = getattr(config, prefixed)
+        elif hasattr(config, name):
+            kwargs[name] = getattr(config, name)
+    return kwargs
+
+
+def _muon_config_to_kwargs(config, model_chunks, pg_collection) -> Dict[str, Any]:
+    """Convert OptimizerConfig to TensorParallelMuon constructor kwargs."""
+    kwargs = _kwargs_from_config(TensorParallelMuon, "muon", config)
+    kwargs["is_qkv_fn"] = lambda p: getattr(p, "is_qkv", False)
+    kwargs["qkv_split_shapes"] = _get_qkv_split_shapes(model_chunks[0].config)
+    kwargs["pg_collection"] = pg_collection
+    return kwargs
+
+
+def _adaptive_muon_config_to_kwargs(config, model_chunks, pg_collection) -> Dict[str, Any]:
+    """Convert OptimizerConfig to TensorParallelAdaptiveMuon constructor kwargs."""
+    kwargs = _muon_config_to_kwargs(config, model_chunks, pg_collection)
+    kwargs.update(_kwargs_from_config(TensorParallelAdaptiveMuon, "adaptive_muon", config))
+    return kwargs
+
+
+def _default_adam_based_eopt_config_to_kwargs(
+    eopt_name, config, model_chunks, pg_collection
+) -> Dict[str, Any]:
+    """Convert OptimizerConfig to default emerging optimizer constructor kwargs."""
+    kwargs = _kwargs_from_config(registry.get_optimizer_cls(eopt_name), eopt_name, config)
+    kwargs["betas"] = (config.adam_beta1, config.adam_beta2)
+    return kwargs
+
+
+# -----------------------------------------------------------------------
+# Register emerging optimizers
+# -----------------------------------------------------------------------
+_EMERGING_OPTIMIZERS = {
+    'muon': EmergingOptimizerEntry(
+        optimizer_cls=TensorParallelMuon, config_to_kwargs=_muon_config_to_kwargs
+    ),
+    "adaptive_muon": EmergingOptimizerEntry(
+        optimizer_cls=TensorParallelAdaptiveMuon, config_to_kwargs=_adaptive_muon_config_to_kwargs
+    ),
+}
+
+# Register soap with default config
+# TODO(skyw): register all emerging optimizers.
+if HAVE_EMERGING_OPTIMIZERS:
+    for eopt_name in registry.get_optimizer_name_list():
+        if eopt_name in _EMERGING_OPTIMIZERS:
+            # skip already registered local versions, e.g. TensorParallel versions.
+            continue
+        _EMERGING_OPTIMIZERS[eopt_name] = EmergingOptimizerEntry(
+            optimizer_cls=registry.get_optimizer_cls(eopt_name)
+        )
diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py
index a9fdc7ba72f..6e0f32ab357 100644
--- a/megatron/core/optimizer/layer_wise_optimizer.py
+++ b/megatron/core/optimizer/layer_wise_optimizer.py
@@ -46,7 +46,6 @@ def __init__(
         pg_collection: Optional[ProcessGroupCollection] = None,
         init_state_fn_list: Optional[List[Callable]] = None,
         model_chunks: Optional[List] = None,
-        async_allgather: bool = False,
     ) -> None:
         """
         Initialize LayerWiseDistributedOptimizer.
@@ -57,14 +56,13 @@ def __init__(
             pg_collection: ProcessGroupCollection.
             init_state_fn_list: List of init state functions.
             model_chunks: DDP-wrapped model chunks (needed for async_allgather).
-            async_allgather: If True, defer param all-gather to forward pre-hooks.
         """
 
         self.pg_collection = pg_collection
         self.shard_params(optimizers)
 
         # Set up async all-gather using DDP bucket infrastructure.
-        self.async_allgather = async_allgather
+        self.async_allgather = config.overlap_param_gather
         if self.async_allgather:
             assert (
                 model_chunks is not None
@@ -76,19 +74,17 @@ def __init__(
                 optimizers
             ), "init_state_fn_list must be the same length as optimizers if provided"
 
-        # wrap optimizer after sharding to avoid unnecessary master weight creation
-        # for higher precision, optimizers are wrapped with megatron already
+        # Wrap base torch optimizers with Float16 for bf16 training.
+        # Callers pass base optimizers; wrapping happens here *after*
+        # shard_params so master weights are only created for the local shard.
         if config.bf16:
-            # unwrap FP32 optimizer, possibly from reusing get_megatron_optimizer for adam
             for i in range(len(optimizers)):
                 opt = optimizers[i]
-                if isinstance(opt, Float16OptimizerWithFloat16Params):
+                if isinstance(opt, (Float16OptimizerWithFloat16Params, FP32Optimizer)):
                     raise TypeError(
-                        'LayerWiseDistributedOptimizer received Float16 optimizer already.'
+                        'LayerWiseDistributedOptimizer expects base torch optimizers, '
+                        f'got {type(opt).__name__}. Do not pre-wrap with Megatron optimizers.'
                     )
-                # unwrap FP32 optimizer from reusing get_megatron_optimizer for adam
-                if isinstance(opt, FP32Optimizer):
-                    opt = opt.optimizer
                 optimizers[i] = Float16OptimizerWithFloat16Params(
                     opt, config, None, init_state_fn_list[i] if init_state_fn_list else None
                 )
diff --git a/megatron/core/optimizer/muon.py b/megatron/core/optimizer/muon.py
index ae4a1a348fa..329ce60dd1f 100644
--- a/megatron/core/optimizer/muon.py
+++ b/megatron/core/optimizer/muon.py
@@ -1,39 +1,8 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
-"""Megatron muon optimizer wrapper to handle tensor-parallel."""
+"""Backward-compatible shim — all code now lives in ``emerging_optimizers``."""
 
-import logging
-from typing import Any, Callable, Dict, List, Literal, Optional
-
-import torch
-from torch.optim.optimizer import ParamsT
-
-from megatron.core.optimizer_param_scheduler import ParamGroupOverride
-from megatron.core.process_groups_config import ProcessGroupCollection
-from megatron.core.transformer.module import MegatronModule
-from megatron.core.utils import get_pg_size, log_single_rank
-
-from . import _get_param_groups, get_megatron_optimizer
-from .layer_wise_optimizer import LayerWiseDistributedOptimizer
-from .optimizer import (
-    ChainedOptimizer,
-    Float16OptimizerWithFloat16Params,
-    FP32Optimizer,
-    MegatronOptimizer,
-)
-from .optimizer_config import OptimizerConfig, ParamKey
-
-try:
-    from emerging_optimizers.orthogonalized_optimizers import (
-        OrthogonalizedOptimizer,
-        get_muon_scale_factor,
-    )
-    from emerging_optimizers.orthogonalized_optimizers.muon_utils import newton_schulz_tp
-
-    HAVE_EMERGING_OPTIMIZERS = True
-except ImportError:
-    HAVE_EMERGING_OPTIMIZERS = False
-    OrthogonalizedOptimizer = object
+from typing import Any
 
 # TODO: Remove this separate try/except once the next version of emerging_optimizers
 # (which includes Lion) is released. Then Lion can be imported in the block above.
@@ -45,338 +14,17 @@
     HAVE_LION = False
 
 
-logger = logging.getLogger(__name__)
-
-
-class TensorParallelMuon(OrthogonalizedOptimizer):
-    """Tensor Parallel Muon optimizer."""
-
-    def __init__(
-        self,
-        params: ParamsT,
-        lr: float = 3e-4,
-        momentum_beta: float = 0.95,
-        use_nesterov: bool = True,
-        weight_decay: float = 0.01,
-        use_decoupled_weight_decay: bool = True,
-        split_qkv: bool = False,
-        is_qkv_fn: Callable[[torch.Tensor], bool] | None = None,
-        qkv_split_shapes: tuple[int, int, int] | None = None,
-        fp32_matmul_prec: str = "medium",
-        coefficient_type: str = "quintic",
-        num_ns_steps: int = 5,
-        scale_mode: str = "spectral",
-        extra_scale_factor: float = 1.0,
-        pg_collection: Optional[ProcessGroupCollection] = None,
-        mode: Literal["blockwise", "duplicated", "distributed"] = "duplicated",
-    ) -> None:
-        if num_ns_steps < 1:
-            raise ValueError(f"num_ns_steps must be at least 1, got {num_ns_steps}")
-
-        def scaled_orthogonalize_fn(
-            grad: torch.Tensor,
-            tp_group: torch.distributed.ProcessGroup,
-            partition_dim: int | None = None,
-        ) -> torch.Tensor:
-            log_single_rank(
-                logger,
-                logging.DEBUG,
-                f'Orthogonalizing grad with {num_ns_steps} steps, {coefficient_type} coefficient, '
-                f'{scale_mode} scale mode, extra_scale_factor={extra_scale_factor}',
-            )
-            size = [grad.size(-2), grad.size(-1)]
-            if partition_dim is not None:
-                size[partition_dim] *= get_pg_size(tp_group)
-            orth_grad = newton_schulz_tp(
-                grad,
-                steps=num_ns_steps,
-                coefficient_type=coefficient_type,
-                tp_group=tp_group,
-                partition_dim=partition_dim,
-                mode="duplicated" if mode == "blockwise" else mode,
-            )
-            scale_factor = get_muon_scale_factor(size[0], size[1], mode=scale_mode)
-            return orth_grad * scale_factor * extra_scale_factor
-
-        self.pg_collection = pg_collection
-        self.mode = mode
-        self.split_qkv = split_qkv
-        self.is_qkv_fn = is_qkv_fn
-        self.qkv_split_shapes = qkv_split_shapes
-
-        weight_decay_method = "decoupled" if use_decoupled_weight_decay else "l2"
-        super().__init__(
-            params,
-            lr,
-            momentum_beta,
-            use_nesterov=use_nesterov,
-            weight_decay=weight_decay,
-            weight_decay_method=weight_decay_method,
-            fp32_matmul_prec=fp32_matmul_prec,
-            scaled_orthogonalize_fn=scaled_orthogonalize_fn,
-        )
-
-    def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> torch.Tensor:
-        """Orthogonalize the momentum.
-
-        Args:
-            p: The parameter tensor. i is necessary to pass param tensor in addition to momentum
-                because a lot of information is only available in the param tensor,
-                attributes for example.
-            grad: The momentum tensor.
-
-        Returns:
-            The orthogonalized gradient tensor.
-        """
-        # TODO(deyuf): switch to group
-        if self.pg_collection:
-            tp_group = (
-                self.pg_collection.expt_tp
-                if getattr(p, 'expert_tp', False)
-                else self.pg_collection.tp
-            )
-        else:
-            tp_group = None
-        partition_dim = None if self.mode == "blockwise" else getattr(p, "partition_dim", None)
-        if partition_dim == -1:
-            # emerging-optimizers use None instead of -1 to indicate no tensor parallel
-            partition_dim = None
-
-        if self.split_qkv and self.is_qkv_fn(p):  # type: ignore[misc]
-            # split grouped attention parameters (e.g., QKV, GQA, etc.)
-            grad_shape = grad.shape
-            log_single_rank(
-                logger,
-                logging.DEBUG,
-                f'qkv split grad shape {grad_shape}, split shapes {self.qkv_split_shapes}',
-            )
-            num_query_groups = grad_shape[0] // sum(self.qkv_split_shapes)
-            qkv_grads = torch.split(
-                grad.view(num_query_groups, sum(self.qkv_split_shapes), -1),
-                self.qkv_split_shapes,
-                dim=1,
-            )
-            qkv_grads = [g.reshape(-1, grad_shape[-1]) for g in qkv_grads]
-
-            # Apply Newton-Schulz and scales to each component, concat back
-            qkv_grads = [
-                self.scaled_orthogonalize_fn(g, tp_group, partition_dim).view(
-                    num_query_groups, -1, grad_shape[-1]
-                )
-                for g in qkv_grads
-            ]
-            grad = torch.cat(qkv_grads, dim=1).view(grad_shape)
-        else:
-            grad = self.scaled_orthogonalize_fn(grad, tp_group, partition_dim)
-        return grad
-
-
-def get_megatron_muon_optimizer(
-    config: OptimizerConfig,
-    model_chunks: List[MegatronModule],
-    config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]] = None,
-    use_gloo_process_groups: bool = True,
-    layer_wise_distributed_optimizer: bool = False,
-    pg_collection: Optional[ProcessGroupCollection] = None,
-) -> MegatronOptimizer:
-    """This function is used to get the muon optimizer for the model chunks.
-    It is used to get the muon optimizer for the model chunks.
+def get_megatron_muon_optimizer(*args: Any, **kwargs: Any) -> Any:
+    """Backward compatible muon optimizer getter.
 
-    Args:
-        config (OptimizerConfig): optimizer configuration object.
-        model_chunks (List[MegatronModule]): model chunks to get optimizer for.
-        use_gloo_process_groups (bool): if false, disable use of Gloo process groups
-            in underlying Megatron optimizers.
-        layer_wise_distributed_optimizer (bool): if true, use layer-wise distributed optimizer.
-            Defaults to False.
+    .. deprecated::
+        Use :func:`megatron.core.optimizer.get_megatron_optimizer` instead.
     """
-    # TODO: Mutating config.optimizer is a side effect; clean up after
-    # https://github.com/NVIDIA/Megatron-LM/pull/3638 lands.
-    # Set the nonlinear optimizer for muon (used for embeddings, biases, norms).
-    config.optimizer = config.muon_scalar_optimizer
-
-    assert HAVE_EMERGING_OPTIMIZERS, "Emerging Optimizers is not installed."
-    if config.muon_scalar_optimizer == 'lion':
-        assert HAVE_LION, (
-            "Lion optimizer requires a version of 'emerging_optimizers' that includes Lion. "
-            "Please upgrade to use --muon-scalar-optimizer lion."
-        )
-
-    # Dist-opt is not supported due to strong coupling with how DDP init grad buffer
-    # In theory we can change DDP to enable use muon and dist-opt-adam together
-    if config.use_distributed_optimizer:
-        raise Exception('muon with dist optimizer is not supported.')
-    # only support bf16 w/o loss scale now
-    if config.fp16:
-        raise Exception('muon with fp16 is not supported.')
-
-    # before this function receive properly created collection
-    if pg_collection is None:
-        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
-
-    log_single_rank(logger, logging.INFO, f'Setting up emerging optimizer with config {config}')
-
-    # Needed for torch_dist ckpt_format, unlike torch ckpt_format
-    # For other emerging optimizers, need to implement init_state_fn as well
-    # TODO(boxiangw): Improve usability after optimizer refactor
-    # TODO(boxiangw): support precision aware optimizer
-    def muon_init_state_fn(opt, config=None):
-        for group in opt.param_groups:
-            for p in group['params']:
-                if len(opt.state[p]) == 0:
-                    opt.state[p]['momentum_buffer'] = torch.zeros_like(p.data)
-
-    def adam_init_state_fn(opt, config=None):
-        for group in opt.param_groups:
-            for p in group['params']:
-                if len(opt.state[p]) == 0:
-                    if config is None or not config.use_precision_aware_optimizer:
-                        opt.state[p]['exp_avg'] = torch.zeros_like(p.data)
-                        opt.state[p]['exp_avg_sq'] = torch.zeros_like(p.data)
-                    else:
-                        opt.initialize_state(p)
-
-    def lion_init_state_fn(opt, config=None):
-        for group in opt.param_groups:
-            for p in group['params']:
-                if len(opt.state[p]) == 0:
-                    opt.state[p]['exp_avg'] = torch.zeros_like(p.data)
-
-    nonlinear_init_state_fn = (
-        lion_init_state_fn if config.muon_scalar_optimizer == 'lion' else adam_init_state_fn
-    )
-
-    optimizers = []
-    # record list of non/linear params
-    linear_params = []
-    nonlinear_params = []
-    for model_chunk in model_chunks:
-        # use config to determine qkv split shapes.
-        # no need to check tp since tp splits by head and this is per head(group) dimension
-        num_attention_heads = model_chunk.config.num_attention_heads
-        num_query_groups = model_chunk.config.num_query_groups
-        kv_channels = model_chunk.config.kv_channels
-        qkv_split_shapes = [
-            num_attention_heads // num_query_groups * kv_channels,
-            kv_channels,
-            kv_channels,
-        ]
-        for name, param in model_chunk.named_parameters():
-            if not param.requires_grad:
-                continue
-            # add flag for expert weight so optimizer can figure which tp group it uses
-            # alternatively, create new param group and save tp_group. this require more
-            # change in optimizer
-            if 'experts' in name and 'shared' not in name:
-                param.expert_tp = True
-            # add flag for qkv parameter
-            # TODO(deyuf): support MLA
-            if 'linear_qkv.weight' in name and len(param.shape) == 2:
-                param.is_qkv = True
-            # TODO(deyuf): currently only allow 2D non-embedding weight to avoid breaking
-            if (
-                not getattr(param, 'is_embedding_or_output_parameter', False)
-                and len(param.shape) == 2
-            ):
-                linear_params.append(param)
-            else:
-                nonlinear_params.append(param)
-
-    muon_kwargs = {
-        "lr": config.lr,
-        "momentum_beta": config.muon_momentum,
-        "use_nesterov": config.muon_use_nesterov,
-        "weight_decay": config.weight_decay,
-        "fp32_matmul_prec": config.muon_fp32_matmul_prec,
-        "num_ns_steps": config.muon_num_ns_steps,
-        "scale_mode": config.muon_scale_mode,
-        "split_qkv": config.muon_split_qkv,
-        "is_qkv_fn": lambda p: getattr(p, "is_qkv", False),
-        "qkv_split_shapes": qkv_split_shapes,
-        "extra_scale_factor": config.muon_extra_scale_factor,
-        "pg_collection": pg_collection,
-        "mode": config.muon_tp_mode,
-    }
-
-    # freezing nonlinear params and get param groups for muon
-    for param in nonlinear_params:
-        param.requires_grad = False
-
-    linear_param_groups = _get_param_groups(model_chunks, config, config_overrides)
-    # if layerwise distributed optimizer is not used, need to handle ep params separately
-    expert_param_groups = []
-    if not layer_wise_distributed_optimizer:
-        for group in linear_param_groups:
-            if group['is_expert_parallel']:
-                expert_param_groups.append(group)
-                linear_param_groups.remove(group)
-
-    optimizer = TensorParallelMuon(linear_param_groups, **muon_kwargs)
-
-    reset_config_bf16 = False
-    if config.bf16:
-        if layer_wise_distributed_optimizer:
-            # creating master weight before layerwise sharding will lead to unnecessary master
-            # weight so here we delay master weight creation into layer_wise unset config.bf16
-            # will also result in all optimizers below(adam) to also not be wrapped
-            config.bf16 = False
-            reset_config_bf16 = True
-        else:
-            # if not using layer_wise wrapper, just create master weight here is fine
-            optimizer = Float16OptimizerWithFloat16Params(
-                optimizer, config, None, muon_init_state_fn
-            )
-    else:
-        optimizer = FP32Optimizer(optimizer, config, muon_init_state_fn)
-
-    optimizers.append(optimizer)
-
-    # expert optimizer exists meaning layerwise distributed optimizer is not used
-    if len(expert_param_groups) > 0:
-        expert_optimizer = TensorParallelMuon(expert_param_groups, **muon_kwargs)
-        if config.bf16:
-            expert_optimizer = Float16OptimizerWithFloat16Params(
-                expert_optimizer, config, None, muon_init_state_fn
-            )
-        else:
-            expert_optimizer = FP32Optimizer(expert_optimizer, config, muon_init_state_fn)
-        setattr(expert_optimizer, 'grad_stats_parallel_group', pg_collection.tp_ep_pp)
-        optimizers.append(expert_optimizer)
-
-    # done with muon, unfreeze nonlinear and freeze linear
-    for param in nonlinear_params:
-        param.requires_grad = True
-    for param in linear_params:
-        param.requires_grad = False
-
-    # call original get. linear params will be skipped since they're freezed
-    chained_adam = get_megatron_optimizer(
-        config,
-        model_chunks,
-        config_overrides=config_overrides,
-        use_gloo_process_groups=use_gloo_process_groups,
-    )
-
-    # unfreeze everything
-    for param in linear_params:
-        param.requires_grad = True
+    from . import get_megatron_optimizer
 
-    # chain everything together
-    init_fns = [muon_init_state_fn] + len(chained_adam.chained_optimizers) * [
-        nonlinear_init_state_fn
-    ]
-    optimizers += chained_adam.chained_optimizers
+    if kwargs.pop('layer_wise_distributed_optimizer', False):
+        config = args[0] if args else kwargs.get('config')
+        if config is not None:
+            config.use_layer_wise_distributed_optimizer = True
 
-    if layer_wise_distributed_optimizer:
-        log_single_rank(logger, logging.INFO, 'Using LayerWiseDistributedOptimizer for Muon')
-        if reset_config_bf16:
-            config.bf16 = True
-        return LayerWiseDistributedOptimizer(
-            optimizers,
-            config,
-            pg_collection,
-            init_state_fn_list=init_fns,
-            model_chunks=model_chunks,
-            async_allgather=config.overlap_param_gather,
-        )
-    return ChainedOptimizer(optimizers)
+    return get_megatron_optimizer(*args, **kwargs)
diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py
index df8ec8ef613..f5d66b8db4f 100644
--- a/megatron/core/optimizer/optimizer.py
+++ b/megatron/core/optimizer/optimizer.py
@@ -1161,20 +1161,26 @@ def _split_state_dict(self, state_dict):
         state_dicts = [None] * len(self.chained_optimizers)
         if state_dict is not None:
             if len(self.model_chunks) == 1:
-                state_dicts[0] = state_dict
+                # When there is only one global model chunk, all sub-optimizers
+                # (e.g., dense and MoE parts) use the same model state dict.
+                state_dicts = [state_dict] * len(self.chained_optimizers)
             else:
-                # Split state_dict if needed
+                # Split state_dict by model chunk object.
                 prefix = "model" if "model0" in state_dict.keys() else "model_"
-                offset = 0
+                chunk_to_global_idx = {chunk: idx for idx, chunk in enumerate(self.model_chunks)}
                 for optimizer_idx, optimizer in enumerate(self.chained_optimizers):
                     if hasattr(optimizer, "model_chunks"):
                         d = {}
-                        for chunk_idx in range(len(optimizer.model_chunks)):
+                        for chunk_idx, model_chunk in enumerate(optimizer.model_chunks):
+                            assert model_chunk in chunk_to_global_idx, (
+                                "Sub-optimizer model chunk was not found in "
+                                "chained optimizer model chunks"
+                            )
+                            global_idx = chunk_to_global_idx[model_chunk]
                             assert (
-                                f"{prefix}{offset}" in state_dict
-                            ), f"Wrong state_dict format, cannot find '{prefix}{offset}'"
-                            d[f"{prefix}{chunk_idx}"] = state_dict[f"{prefix}{offset}"]
-                            offset += 1
+                                f"{prefix}{global_idx}" in state_dict
+                            ), f"Wrong state_dict format, cannot find '{prefix}{global_idx}'"
+                            d[f"{prefix}{chunk_idx}"] = state_dict[f"{prefix}{global_idx}"]
                         if len(d) > 0:
                             state_dicts[optimizer_idx] = d
         return state_dicts
diff --git a/megatron/core/optimizer/optimizer_config.py b/megatron/core/optimizer/optimizer_config.py
index 16b0a54cb6b..d425a56be71 100644
--- a/megatron/core/optimizer/optimizer_config.py
+++ b/megatron/core/optimizer/optimizer_config.py
@@ -142,7 +142,6 @@ class OptimizerConfig:
     ##############
     # General
     ##############
-
     lr: Optional[float] = None
     """Initial learning rate. Depending on decay style and initial warmup, the learning rate at each
        iteration would be different.
@@ -207,7 +206,8 @@ class OptimizerConfig:
     """dtype of exp_avg_sq when enabling precision-aware-optimizer"""
 
     optimizer: str = 'adam'
-    """Optimizer name. NOTE: Deprecated, use individual optimizer classes instead."""
+    """Optimizer name (e.g., 'adam', 'sgd', 'muon'). Can be overridden per-parameter group
+    via config_overrides to use different optimizers for different parameters."""
 
     ###############
     # Loss scaling
@@ -230,7 +230,7 @@ class OptimizerConfig:
     """Hysteresis for dynamic loss scaling."""
 
     ###################################################################################
-    # Optimizer (NOTE: Deprecated, use individual optimizer classes instead.).
+    # Optimizer-specific parameters.
     ###################################################################################
     # Adam.
     adam_beta1: float = 0.9
@@ -255,15 +255,14 @@ class OptimizerConfig:
     sgd_momentum: float = 0.9
     """Momentum factor for SGD optimizer."""
 
-    # Muon.
-    # TODO: move muon configs to it's own `MuonConfig`.
+    # emerging optimizers.
     muon_momentum: float = 0.95
-    """The momentum used by the internal SGD."""
+    """The momentum used by the internal SGD in Muon optimizer."""
 
     muon_split_qkv: bool = True
     """Whether to split QKV parameters for Muon optimizer."""
 
-    muon_use_nesterov: bool = False
+    muon_nesterov: bool = False
     """Whether to use Nesterov-style momentum in the internal SGD."""
 
     muon_scale_mode: str = "spectral"
@@ -281,6 +280,24 @@ class OptimizerConfig:
     muon_extra_scale_factor: float = 1.0
     """Additional scale factor for the muon update."""
 
+    soap_shampoo_beta: float = 0.95
+    """The beta parameter for the Shampoo preconditioner."""
+
+    soap_precondition_frequency: int = 1
+    """The frequency of the Shampoo preconditioner."""
+
+    soap_use_kl_shampoo: bool = True
+    """Whether to use the KL-Shampoo preconditioner."""
+
+    adaptive_muon_moment2_method: str = 'adamuon'
+    """The method to use for the moment2 update in Adaptive Muon optimizer."""
+
+    adaptive_muon_beta2: float = 0.95
+    """The beta2 parameter for the Adaptive Muon optimizer."""
+
+    adaptive_muon_eps: float = 1e-8
+    """The eps parameter for the Adaptive Muon optimizer."""
+
     muon_scalar_optimizer: str = 'adam'
     """Optimizer for nonlinear parameters (embeddings, biases, norms) when using muon.
     One of 'adam' or 'lion'. Defaults to 'adam'."""
@@ -299,6 +316,12 @@ class OptimizerConfig:
     use_distributed_optimizer: bool = False
     """Distribute optimizer state over data-parallel replicas."""
 
+    use_layer_wise_distributed_optimizer: bool = False
+    """Use :class:`LayerWiseDistributedOptimizer` for emerging optimizers (e.g. Muon).
+    When set via ``--use-distributed-optimizer`` with an emerging optimizer, the training
+    arguments layer sets this flag and resets ``use_distributed_optimizer`` to False so
+    that the standard distributed-optimizer path is not triggered."""
+
     overlap_param_gather: bool = False
     """If true, overlap param all-gather with forward compute. 
         This argument is intended to have the same value as the "overlap_param_gather" argument 
@@ -337,6 +360,12 @@ class OptimizerConfig:
     pin_cpu_params: bool = True
     """If True, pin the optimizer parameters to CPU memory."""
 
+    offload_optimizer_states: bool = False
+    """
+    If True, offload optimizer states to CPU after each optimizer step and
+    reload them before the next optimizer step.
+    """
+
     ################
     # Miscellaneous
     ################
@@ -438,33 +467,6 @@ def __post_init__(self):
             ), "exp_avg_sq_dtype can only be fp32 when not using precision-aware optimizer"
 
 
-@dataclass
-class AdamOptimizerConfig(OptimizerConfig):
-    """Adam optimizer configuration object."""
-
-    optimizer: str = 'adam'
-    """Optimizer name."""
-
-    adam_beta1: float = 0.9
-    """First coefficient for computing running averages of gradient and its square in Adam
-    optimizer.
-    """
-
-    adam_beta2: float = 0.999
-    """Second coefficient for computing running averages of gradient and its square in Adam
-    optimizer.
-    """
-
-    adam_eps: float = 1e-08
-    """Term added to the denominator to improve numerical stability in Adam optimizer."""
-
-
-@dataclass
-class SGDOptimizerConfig(OptimizerConfig):
-    """SGD optimizer configuration object."""
-
-    optimizer: str = 'sgd'
-    """Optimizer name."""
-
-    sgd_momentum: float = 0.9
-    """Momentum factor for SGD optimizer."""
+# Backward-compatible aliases (deprecated; use OptimizerConfig directly).
+AdamOptimizerConfig = OptimizerConfig
+SGDOptimizerConfig = OptimizerConfig
diff --git a/megatron/core/optimizer_param_scheduler.py b/megatron/core/optimizer_param_scheduler.py
index e01a708ce79..91ed362b1b2 100644
--- a/megatron/core/optimizer_param_scheduler.py
+++ b/megatron/core/optimizer_param_scheduler.py
@@ -14,7 +14,7 @@
 logger = logging.getLogger(__name__)
 
 
-class ParamGroupOverride(TypedDict):
+class ParamGroupOverride(TypedDict, total=False):
     """Override values for a parameter group. These values may be optimizer-state/scheduler related.
 
     These are the values you see later in param_group.get(...) calls in the
@@ -23,7 +23,7 @@ class ParamGroupOverride(TypedDict):
 
     Example:
         >>> param_group_override = ParamGroupOverride(min_lr=1e-4, wd_mult=0.1)
-        >>> param_group_override == ParamGroupOverride(newvar=3) # this is ok too
+        >>> param_group_override == ParamGroupOverride(optimizer='muon')  # per-param optimizer
 
     """
 
@@ -32,6 +32,7 @@ class ParamGroupOverride(TypedDict):
     start_wd: float
     end_wd: float
     wd_mult: float
+    optimizer: str
 
 
 def get_canonical_lr_for_logging(param_groups: list[dict]) -> float | None:
diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py
index 1435e207b18..24e873c25e2 100644
--- a/megatron/core/parallel_state.py
+++ b/megatron/core/parallel_state.py
@@ -115,8 +115,8 @@
 _CONTEXT_PARALLEL_GLOBAL_RANKS = None
 # Hierarchical context parallel groups
 _HIERARCHICAL_CONTEXT_PARALLEL_GROUPS = None
-# Hybrid context parallel groups
-_HYBRID_DP_CP_GROUPS = {}
+# Dynamic context parallel groups
+_DYNAMIC_DP_CP_GROUPS = {}
 
 # Data parallel group information with context parallel combined.
 _DATA_PARALLEL_GROUP_WITH_CP = None
@@ -419,29 +419,29 @@ def create_hierarchical_groups(
     return hierarchical_groups, hierarchical_groups_gloo
 
 
-def create_hybrid_dp_cp_groups(rank, ranks, pg_options):
+def create_dynamic_dp_cp_groups(rank, ranks, pg_options):
     """
-    Creates groups required for hybrid DPxCP.
+    Creates groups required for dynamic DPxCP.
     Creates a new group for every power of 2 up to the number of DPxCP ranks.
     Returns a dictionary indexed by group size.
     """
-    hybrid_dp_cp_groups = {}
+    dynamic_dp_cp_groups = {}
     # Generate group for every power of 2 up to the number of CP ranks
     # We limit the allowed group sizes in order to avoid excessive overhead.
-    group_sizes = [2**i for i in range(int(log2(len(ranks))))][1:]
+    group_sizes = [2**i for i in range(int(log2(len(ranks))))]
     for group_size in group_sizes:
         for i in range(0, len(ranks), group_size):
             group = create_group(
                 ranks[i : i + group_size],
                 pg_options=pg_options,
-                group_desc=f"HYBRID_DP_CP_GROUP_{group_size}",
+                group_desc=f"DYNAMIC_DP_CP_GROUP_{group_size}",
             )
             if rank in ranks[i : i + group_size]:
                 assert (
-                    group_size not in hybrid_dp_cp_groups
-                ), f"Rank {rank} appears in multiple Hybrid DP CP groups of size {group_size}"
-                hybrid_dp_cp_groups[group_size] = group
-    return hybrid_dp_cp_groups
+                    group_size not in dynamic_dp_cp_groups
+                ), f"Rank {rank} appears in multiple Dynamic DP CP groups of size {group_size}"
+                dynamic_dp_cp_groups[group_size] = group
+    return dynamic_dp_cp_groups
 
 
 class RankGenerator(object):
@@ -553,7 +553,7 @@ def initialize_model_parallel(
     use_sharp: bool = False,
     context_parallel_size: int = 1,
     hierarchical_context_parallel_sizes: Optional[List[int]] = None,
-    hybrid_context_parallel: bool = False,
+    dynamic_context_parallel: bool = False,
     expert_model_parallel_size: int = 1,
     num_distributed_optimizer_instances: int = 1,
     expert_tensor_parallel_size: Optional[int] = None,
@@ -939,18 +939,29 @@ def initialize_model_parallel(
         if "NCCL_COLLNET_ENABLE" in os.environ:
             del os.environ["NCCL_COLLNET_ENABLE"]
 
-    if hybrid_context_parallel:
-        global _HYBRID_DP_CP_GROUPS
+    if dynamic_context_parallel:
+        # TODO: Are gloo groups needed for Dynamic CP?
+        global _DYNAMIC_DP_CP_GROUPS
         for ranks_with_cp in decoder_rank_generator.get_ranks('dp-cp'):
             assert (
                 len(ranks_with_cp) % 2 == 0
-            ), "Hybrid context parallel requires an even number of ranks"
-            _HYBRID_DP_CP_GROUPS.update(
-                create_hybrid_dp_cp_groups(
+            ), "Dynamic context parallel requires an even number of ranks"
+            _DYNAMIC_DP_CP_GROUPS.update(
+                create_dynamic_dp_cp_groups(
                     rank, ranks_with_cp, get_nccl_options("dp_cp", nccl_comm_cfgs)
                 )
             )
-        # TODO: Are gloo groups needed for hybrid cp?
+
+        # PyTorch is performing lazy initialization of the communicator group.
+        # Therefore, we need to perform a nccl call to ensure that the communicator group is created.
+        data_parallel_size_with_cp = data_parallel_size * context_parallel_size
+        group_sizes = [2**i for i in range(0, int(log2(data_parallel_size_with_cp)))]
+        if group_sizes[-1] * 2 == data_parallel_size_with_cp:
+            group_sizes.append(data_parallel_size_with_cp)
+        for group_size in group_sizes:
+            group = get_dynamic_data_context_parallel_groups(group_size=group_size)
+            torch.distributed.barrier(group=group, device_ids=[torch.cuda.current_device()])
+            torch.cuda.synchronize()
 
     for ranks in decoder_rank_generator.get_ranks('dp'):
         group = create_group(
@@ -1474,16 +1485,16 @@ def get_hierarchical_context_parallel_groups(check_initialized=True):
     return _HIERARCHICAL_CONTEXT_PARALLEL_GROUPS
 
 
-def get_hybrid_data_context_parallel_groups(check_initialized=True, group_size=None):
-    """Get the hybrid context parallel groups the caller rank belongs to."""
+def get_dynamic_data_context_parallel_groups(check_initialized=True, group_size=None):
+    """Get the dynamic context parallel groups the caller rank belongs to."""
     # If the group size is the same as the entire DPxCP group, return the original group
     if get_data_parallel_world_size(with_context_parallel=True) == group_size:
         if check_initialized:
             assert _DATA_PARALLEL_GROUP_WITH_CP is not None
         return _DATA_PARALLEL_GROUP_WITH_CP
     if check_initialized:
-        assert _HYBRID_DP_CP_GROUPS is not None
-    return _HYBRID_DP_CP_GROUPS[group_size]
+        assert _DYNAMIC_DP_CP_GROUPS is not None
+    return _DYNAMIC_DP_CP_GROUPS[group_size]
 
 
 def get_embedding_group(check_initialized=True):
diff --git a/megatron/core/pipeline_parallel/hybrid_cp_schedule.py b/megatron/core/pipeline_parallel/dynamic_cp_schedule.py
similarity index 99%
rename from megatron/core/pipeline_parallel/hybrid_cp_schedule.py
rename to megatron/core/pipeline_parallel/dynamic_cp_schedule.py
index 27b5fc87945..48dd633aeba 100644
--- a/megatron/core/pipeline_parallel/hybrid_cp_schedule.py
+++ b/megatron/core/pipeline_parallel/dynamic_cp_schedule.py
@@ -48,7 +48,7 @@ def gpus_needed(self, seq_len: int) -> int:
         This is used to determine the CP size of a sub-sample.
 
         The number is rounded up to the next power of 2 to match the available
-        hybrid context parallel process group sizes.
+        dynamic context parallel process group sizes.
         """
         return max(1, 2 ** ceil(log2((seq_len / self.max_seq_len_per_rank))))
 
@@ -370,7 +370,7 @@ def fill_empty_gpus(
             "try to increase 'max-seqlen-per-cp-rank'."
 
             min_group_size = min(existing_group_sizes)
-            # We have Hybrid DPxCP groups for every power of 2 of GPUs or the entire DPxCP group.
+            # We have Dynamic DPxCP groups for every power of 2 of GPUs or the entire DPxCP group.
             next_power = min(min_group_size * 2, total_gpus)
 
             # Find the first group of min_group_size that can be expanded
@@ -474,7 +474,7 @@ def get_groups_and_subsamples(self, sample_id_seqlens, config):
         return groups, sample_id_groups
 
 
-def hybrid_context_parallel_forward_backward(
+def dynamic_context_parallel_forward_backward(
     forward_step_func,
     data_iterator,
     model,
@@ -492,7 +492,7 @@ def hybrid_context_parallel_forward_backward(
     model_type,
 ):
     """
-    Scheduler for Hybrid Context Parallel.
+    Scheduler for Dynamic Context Parallel.
 
     This function performs the packed sample scheduling and determines
     1. The number of microbatches to schedule for each CP rank
diff --git a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
index 08e46a039e2..99e3e3e4a2a 100644
--- a/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
+++ b/megatron/core/pipeline_parallel/fine_grained_activation_offload.py
@@ -5,6 +5,7 @@
 from typing import Any, Dict, Tuple
 
 import torch
+from torch.autograd.graph import saved_tensors_hooks
 
 # CPU offload implementation for pipeline parallelism
 DEBUG = False
@@ -94,9 +95,9 @@ def print_offload_summary_table(total_offload_bytes: Dict[str, int]):
     torch.distributed.barrier()
 
 
-class GPUTensorPool:
+class OffloadTensorPool:
     """
-    GPU memory pool for efficient allocation and deallocation of tensors.
+    Memory pool for efficient allocation and deallocation of tensors.
 
     Features:
     - Supports multiple tensor shapes and dtypes, each with its own pool
@@ -105,7 +106,7 @@ class GPUTensorPool:
     - Uses queue-based management for O(1) allocation and deallocation
 
     Example:
-        pool = GPUTensorPool(device='cuda:0')
+        pool = OffloadTensorPool(device='cuda:0')
         tensor = pool.allocate((128, 512), dtype=torch.float32)
         # ... use tensor ...
         pool.free(tensor, (128, 512), dtype=torch.float32)
@@ -113,10 +114,10 @@ class GPUTensorPool:
 
     def __init__(self, device: str = 'cuda', pin_memory: bool = False):
         """
-        Initialize GPU tensor pool.
+        Initialize offload tensor pool.
 
         Args:
-            device: GPU device, default 'cuda'
+            device: Device, default 'cuda'
             pin_memory: Whether to use pinned memory (mainly for CPU tensors)
         """
         self.device = torch.device(device)
@@ -136,7 +137,7 @@ def __init__(self, device: str = 'cuda', pin_memory: bool = False):
             'pool_misses': 0,  # Number of times a new tensor was created
         }
 
-        debug_rank("GPUTensorPool: Initialized with dynamic allocation")
+        debug_rank("OffloadTensorPool: Initialized with dynamic allocation")
 
     def _get_pool_key(self, shape: Tuple, dtype: torch.dtype) -> Tuple:
         """Generate a unique key for the pool based on shape and dtype."""
@@ -181,7 +182,7 @@ def allocate(self, shape: Tuple, dtype: torch.dtype = torch.float32) -> torch.Te
             tensor = pool['free'].popleft()
             self._stats['pool_hits'] += 1
             debug_rank(
-                f"GPUTensorPool.allocate: Reused tensor from pool, "
+                f"OffloadTensorPool.allocate: Reused tensor from pool, "
                 f"shape={shape}, dtype={dtype}, "
                 f"remaining in pool={len(pool['free'])}"
             )
@@ -194,7 +195,7 @@ def allocate(self, shape: Tuple, dtype: torch.dtype = torch.float32) -> torch.Te
 
             memory_mb = self._calculate_memory_size(shape, dtype) / (1024**2)
             debug_rank(
-                f"GPUTensorPool.allocate: Created new tensor, "
+                f"OffloadTensorPool.allocate: Created new tensor, "
                 f"shape={shape}, dtype={dtype}, "
                 f"memory={memory_mb:.2f} MB, "
                 f"total_created={len(pool['all'])}"
@@ -244,7 +245,7 @@ def free(self, tensor: torch.Tensor):
         self._stats['current_in_use'] -= 1
 
         debug_rank(
-            f"GPUTensorPool.free: shape={shape}, dtype={dtype}, "
+            f"OffloadTensorPool.free: shape={shape}, dtype={dtype}, "
             f"available in pool={len(pool['free'])}"
         )
 
@@ -293,7 +294,7 @@ def get_pool_status(self, shape: Tuple = None, dtype: torch.dtype = None) -> Dic
 
     def reset(self):
         """Reset the pool, marking all tensors as available."""
-        debug_rank("GPUTensorPool: Resetting pool...")
+        debug_rank("OffloadTensorPool: Resetting pool...")
 
         for pool_key, pool in self._pools.items():
             # Clear and refill the free queue
@@ -303,11 +304,11 @@ def reset(self):
             pool['allocated_count'] = 0
 
         self._stats['current_in_use'] = 0
-        debug_rank("GPUTensorPool: Reset complete")
+        debug_rank("OffloadTensorPool: Reset complete")
 
     def clear(self):
         """Clear the pool and release all GPU memory."""
-        debug_rank("GPUTensorPool: Clearing pool...")
+        debug_rank("OffloadTensorPool: Clearing pool...")
 
         for pool_key, pool in self._pools.items():
             # Clear all references, allowing PyTorch GC to reclaim memory
@@ -321,7 +322,7 @@ def clear(self):
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
 
-        debug_rank("GPUTensorPool: Clear complete")
+        debug_rank("OffloadTensorPool: Clear complete")
 
     def __del__(self):
         """Destructor to ensure resources are released."""
@@ -410,11 +411,16 @@ def __init__(self):
         # allocate streams and events for synchronization
         self._d2h_stream = torch.cuda.Stream()
         self._h2d_stream = torch.cuda.Stream()
+        # CUDA graph stream and event for offloading modules in cuda graph
+        self._cuda_graph_stream = torch.cuda.Stream()
+        self._cuda_graph_event = torch.cuda.Event(external=True)
         # Shared CPU tensor pool for all chunks to improve reuse efficiency
-        self._cpu_tensor_pool = GPUTensorPool(device="cpu", pin_memory=True)
+        self._cpu_tensor_pool = OffloadTensorPool(device="cpu", pin_memory=True)
 
         # Whether the manager is in warmup phase.
         self._is_warmup = True
+        # Whether the manager is in CUDA graph replay phase.
+        self._in_replay = False
         # Cache OffloadChunkHandler objects for each virtual pipeline stage and each forward pass.
         self._cached_chunks_forward = []
         # Cache OffloadChunkHandler objects for each virtual pipeline stage and each backward pass.
@@ -433,6 +439,10 @@ def __init__(self):
         self._delayed_offload_groups = []
         self.reset()
 
+        self._saved_tensors_hooks = saved_tensors_hooks(
+            self.on_save_for_backward, self.on_get_saved_tensor
+        )
+
     @property
     def d2h_stream(self):
         """Get the device-to-host (GPU to CPU) transfer stream."""
@@ -443,22 +453,32 @@ def h2d_stream(self):
         """Get the host-to-device (CPU to GPU) transfer stream."""
         return self._h2d_stream
 
+    @property
+    def cuda_graph_stream(self):
+        """Get the CUDA graph stream."""
+        return self._cuda_graph_stream
+
+    @property
+    def cuda_graph_event(self):
+        """Get the CUDA graph event."""
+        return self._cuda_graph_event
+
     @property
     def cpu_tensor_pool(self):
         """Get the shared CPU tensor pool."""
         return self._cpu_tensor_pool
 
-    def push_offload_groups(self, group_hook, forced_released_tensors):
+    def push_offload_groups(self, group_hook, name, forced_released_tensors):
         """Push the offload groups to the delayed queue."""
         debug_rank(f"pushing offload groups to the delayed queue")
-        self._delayed_offload_groups.append((group_hook, forced_released_tensors))
+        self._delayed_offload_groups.append((group_hook, name, forced_released_tensors))
 
     def flush_delayed_groups(self):
         """Flush the delayed groups."""
         debug_rank("flushing delayed groups")
-        # Flush the delayed groups in reverse order to maintain the order of the groups.
-        for group_hook, forced_released_tensors in reversed(self._delayed_offload_groups):
-            group_hook(forced_released_tensors)
+        # Flush the delayed groups in forward order.
+        for group_hook, name, forced_released_tensors in self._delayed_offload_groups:
+            group_hook(name, forced_released_tensors)
         self._delayed_offload_groups = []
 
     def reset(self):
@@ -549,13 +569,41 @@ def post_warmup_callback(self):
                 debug_rank(f"setting offload to false for group {name} at chunk index {chunk_idx}")
             else:
                 break
-        debug_rank(f"offload margin {self._offload_margin}")
         assert self._offload_margin == 0, "Offload margin is not 0"
+        # Disable the groups to meet the delta offload bytes across PP ranks.
+        keep_on_gpu_bytes = self._pp_rank * self._delta_offload_bytes_across_pp_ranks
+        for chunk in self._cached_chunks_backward:
+            for group in chunk.offload_groups:
+                if group.offload and keep_on_gpu_bytes > 0:
+                    debug_rank(
+                        f"group {group._name} offload {group.offload} \
+                        keep_on_gpu_bytes {keep_on_gpu_bytes}"
+                    )
+                    keep_on_gpu_bytes -= group.total_offload_bytes
+                    group.offload = False
+        # Disable the groups to meet the activation offload fraction.
+        for chunk in self._cached_chunks_backward:
+            offloaded_groups_count = 0
+            for group in chunk.offload_groups:
+                if group.offload:
+                    offloaded_groups_count += 1
+            disabled_groups_count = int(
+                offloaded_groups_count * (1 - self._activation_offload_fraction)
+            )
+            debug_rank(f"Disabled {disabled_groups_count}/{offloaded_groups_count} groups")
+            for group in reversed(chunk.offload_groups):
+                if group.offload:
+                    if disabled_groups_count > 0:
+                        disabled_groups_count -= 1
+                        group.offload = False
+                    else:
+                        break
         # Dump the offload information
         total_tensor_count = {}
         total_offload_bytes = {}
         for chunk in self._cached_chunks_forward:
             for group in chunk.offload_groups:
+                debug_rank(f"chunk {chunk} group {group} offload {group.offload}")
                 if group.offload:
                     if group._name not in total_tensor_count:
                         total_tensor_count[group._name] = 0
@@ -567,6 +615,8 @@ def post_warmup_callback(self):
             # where the memory cost will not increase anymore.
             if chunk is self._cached_chunks_backward[0]:
                 break
+        debug_rank(f"total_tensor_count {total_tensor_count}")
+        debug_rank(f"total_offload_bytes {total_offload_bytes}")
         # Cache summary for downstream consumers (e.g., unit tests).
         self._offload_summary_bytes = dict(total_offload_bytes)
         self._offload_summary_total_bytes = int(sum(total_offload_bytes.values()))
@@ -607,15 +657,25 @@ def front_backward_chunk(self, name=None):
         return None
 
     def init_model_chunk_offload_handler(
-        self, vp_size, vp_stage, min_offloaded_tensor_size=1024 * 1024
+        self,
+        pp_rank,
+        vp_size,
+        vp_stage,
+        min_offloaded_tensor_size=1024 * 1024,
+        delta_offload_bytes_across_pp_ranks=0,
+        activation_offload_fraction: float = 1.0,
     ):
         """
         Initialize a chunk offload handler for a model chunk (microbatch).
 
         Args:
+            pp_rank: Pipeline parallel rank
             vp_size: Virtual pipeline size
             vp_stage: Virtual pipeline stage index (None means stage 0)
             min_offloaded_tensor_size: Minimum tensor size (in elements) to offload
+            delta_offload_bytes_across_pp_ranks:
+                Difference of offload bytes across PP ranks to balance the offload load.
+            activation_offload_fraction: Fraction of eligible groups to offload, in range [0, 1].
         """
         if not self._is_warmup:
             return
@@ -625,6 +685,10 @@ def init_model_chunk_offload_handler(
             self._vpp = vp_size
             self._stages = [[] for _ in range(vp_size)]
 
+        self._delta_offload_bytes_across_pp_ranks = delta_offload_bytes_across_pp_ranks
+        self._pp_rank = pp_rank
+        self._activation_offload_fraction = activation_offload_fraction
+
         if vp_stage is None:
             cur_vpp_rank = 0
         else:
@@ -670,10 +734,10 @@ def cur_backward_chunk(self):
         """Get the current backward pass chunk handler."""
         return self._cur_backward_chunk
 
-    def mark_not_offloadable(self, tensor: torch.Tensor):
+    def mark_not_offload(self, tensor: torch.Tensor):
         """Mark the current forward chunk as not offloadable."""
         if tensor is not None:
-            tensor.offloading_activation = False
+            tensor._do_not_offload = True
 
     def __enter__(self):
         """Enter context manager to enable activation offloading hooks."""
@@ -687,10 +751,7 @@ def __enter__(self):
         else:
             raise RuntimeError("TE CPU offload is not available")
         self.inside_context = True
-
-        torch._C._autograd._push_saved_tensors_default_hooks(
-            self.on_save_for_backward, self.on_get_saved_tensor
-        )
+        self._saved_tensors_hooks.__enter__()
 
     def __exit__(self, *args: Any):
         """Exit context manager and restore original tensor saving behavior."""
@@ -704,7 +765,7 @@ def __exit__(self, *args: Any):
         else:
             raise RuntimeError("TE CPU offload is not available")
         self.inside_context = False
-        torch._C._autograd._pop_saved_tensors_default_hooks()
+        self._saved_tensors_hooks.__exit__()
 
     def on_save_for_backward(self, tensor: torch.Tensor) -> Any:
         """
@@ -794,17 +855,17 @@ def reset(self):
         self._tensor_count_current_group = 0
         self._reloading_group = []
 
-    def find_group_with_name(self, name: str, start_index: int = 0):
+    def find_group_with_name(
+        self, groups: list[OffloadTensorGroup], name: str, start_index: int = 0
+    ):
         """Find the group with the given name starting from the given index."""
-        return next(
-            (group for group in self.offload_groups[start_index:] if group._name == name), None
-        )
+        return next((group for group in groups[start_index:] if group._name == name), None)
 
     def is_empty_chunk(self, name=None):
         """Check if this chunk has no tensors to manage."""
         debug_rank(f"------is_empty_chunk {self._max_group_size}")
         if name is not None:
-            return self.find_group_with_name(name) is None
+            return self.find_group_with_name(self.offload_groups, name) is None
         return self._max_group_size == 0
 
     def finish_all_groups(self, name=None) -> bool:
@@ -821,12 +882,15 @@ def finish_all_groups(self, name=None) -> bool:
         ):
             return True
         assert name is not None, "Name is required"
-        return self.find_group_with_name(name, self._offloaded_group_index) is None
+        return (
+            self.find_group_with_name(self.offload_groups, name, self._offloaded_group_index)
+            is None
+        )
 
     def find_next_group(self, name=None):
         """Find the next group with the given name."""
         assert name is not None, "Name is required"
-        return self.find_group_with_name(name, self._offloaded_group_index)
+        return self.find_group_with_name(self.offload_groups, name, self._offloaded_group_index)
 
     def tensor_push(self, tensor):
         """Push tensor to the offload handler."""
@@ -859,20 +923,19 @@ def tensor_pop(self, tensor_tag):
 
     def tensor_need_offloading_checker(self, tensor):
         """Check if the tensor needs to be offloaded."""
-        debug_rank(
-            f"tensor_need_offloading_checker {getattr(tensor, 'offloading_activation', None)}"
-        )
+        debug_rank("tensor_need_offloading_checker")
         if tensor.numel() < self.min_offloaded_tensor_size:
             return False
         # Respect tensor's offload preference if specified
-        if hasattr(tensor, "offloading_activation") and not tensor.offloading_activation:
+        if getattr(tensor, "_TE_do_not_offload", False) or getattr(
+            tensor, "_do_not_offload", False
+        ):
             return False
         return True
 
-    def bulk_offload_group(self):
+    def bulk_offload_group(self, group_to_offload):
         """offload a group of tensors recorded in tensor_push()."""
         debug_rank("------bulk_offload_group")
-        group_to_offload = self._groups_to_offload[-1]
         torch.cuda.nvtx.range_push("activation offloading " + group_to_offload._name)
         with torch.cuda.stream(self.d2h_stream):
             for tensor_tag, tensor_on_device in group_to_offload._tensors.items():
@@ -885,7 +948,6 @@ def bulk_offload_group(self):
                     tensor_on_device.record_stream(self.d2h_stream)
                     group_to_offload.push_tensor(tensor_tag, state)
             group_to_offload.record_offload_event(self.d2h_stream)
-        self._groups_to_offload.pop()
         torch.cuda.nvtx.range_pop()
 
     def get_max_deduplicated_groups(self):
@@ -925,10 +987,11 @@ def pre_reload_last_layer(self):
             # Reload the last group (last layer) early
             self.bulk_reload_group()
 
-    def should_bulk_offload(self):
+    def should_bulk_offload(self, name):
         """Determine if the current group should be offloaded."""
         assert len(self._groups_to_offload) > 0, "No groups to offload"
-        group = self._groups_to_offload[-1]
+        group = self.find_group_with_name(self._groups_to_offload, name)
+        assert group is not None, f"Group {name} not found in {self._groups_to_offload}"
         debug_rank(f"should_bulk_offload {self.is_warmup} {group.offload}")
         # Don't offload if the chunk is not in warmup stage
         if self.is_warmup:
@@ -949,12 +1012,17 @@ def should_bulk_offload(self):
 
         return True
 
-    def bulk_offload(self, forced_released_tensors):
+    def bulk_offload(self, name, forced_released_tensors):
         """Offload a group of tensors and optionally release their GPU memory."""
         debug_rank("----bulk_offload")
-        if self.should_bulk_offload():
-            self._groups_to_reload.append(self._groups_to_offload[-1])
-            self.bulk_offload_group()
+        if self.should_bulk_offload(name):
+            group_to_offload = self.find_group_with_name(self._groups_to_offload, name)
+            assert (
+                group_to_offload is not None
+            ), f"Group {name} not found in {self._groups_to_offload}"
+            self._groups_to_reload.append(group_to_offload)
+            self.bulk_offload_group(group_to_offload)
+            self._groups_to_offload.remove(group_to_offload)
             # Manually release tensors not auto-freed by torch GC
             if len(forced_released_tensors) > 0:
                 cur_stream = torch.cuda.current_stream()
@@ -964,14 +1032,14 @@ def bulk_offload(self, forced_released_tensors):
                         release_tensor.record_stream(cur_stream)
                         release_tensor.untyped_storage().resize_(0)
 
-    def on_group_commit_forward(self, forced_released_tensors):
+    def on_group_commit_forward(self, name, forced_released_tensors):
         """Called at the end of a layer group's forward pass to trigger offloading."""
         if not self.do_offload:
             return
-        debug_rank("--on_group_commit_forward")
+        debug_rank(f"--on_group_commit_forward {name}")
         # Wait for compute to finish before starting offload
         self.d2h_stream.wait_stream(torch.cuda.current_stream())
-        self.bulk_offload(forced_released_tensors)
+        self.bulk_offload(name, forced_released_tensors)
 
     def bulk_reload(self):
         """Reload the next group of tensors from CPU to GPU."""
@@ -1070,12 +1138,12 @@ def forward(ctx, tensor, cur_forward_chunk, name, forced_released_tensors, delay
         # pylint: disable=missing-function-docstring
         debug_rank("FineGrainedOffloadingGroupCommitFunction forward")
 
-        if delay_offload:
+        if delay_offload and PipelineOffloadManager.get_instance()._in_replay:
             PipelineOffloadManager.get_instance().push_offload_groups(
-                cur_forward_chunk.on_group_commit_forward, forced_released_tensors
+                cur_forward_chunk.on_group_commit_forward, name, forced_released_tensors
             )
         else:
-            cur_forward_chunk.on_group_commit_forward(forced_released_tensors)
+            cur_forward_chunk.on_group_commit_forward(name, forced_released_tensors)
         ctx.cpu_offload_handler = cur_forward_chunk
         ctx.name = name
         return tensor
@@ -1172,13 +1240,6 @@ def fine_grained_offloading_group_start(tensor, name=None):
     return FineGrainedOffloadingGroupStartFunction.apply(tensor, cur_forward_chunk, name)
 
 
-def fine_grained_offloading_forward_record(event: torch.cuda.Event) -> None:
-    """Record the forward event for cuda graph capture."""
-    d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
-    torch.cuda.current_stream().record_event(event)
-    torch.cuda.current_stream().wait_stream(d2h_stream)
-
-
 class FineGrainedOffloadingBackwardRecordFunction(torch.autograd.Function):
     """
     Identity operation that marks the end of a layer group for offload synchronization.
@@ -1186,23 +1247,19 @@ class FineGrainedOffloadingBackwardRecordFunction(torch.autograd.Function):
     """
 
     @staticmethod
-    def forward(ctx, tensor, event: torch.cuda.Event) -> torch.Tensor:
+    def forward(ctx, tensor) -> torch.Tensor:
         """Forward pass for cuda graph capture."""
-        ctx.event = event
+        debug_rank("FineGrainedOffloadingBackwardRecordFunction forward")
         return tensor
 
     @staticmethod
     def backward(ctx, grad_output):
         """Record the backward event and wait for the h2d stream on cuda graph stream."""
-        h2d_stream = PipelineOffloadManager.get_instance().h2d_stream
-        torch.cuda.current_stream().record_event(ctx.event)
-        torch.cuda.current_stream().wait_stream(h2d_stream)
-        return grad_output, None
-
-
-def fine_grained_offloading_backward_record(tensor, event: torch.cuda.Event) -> torch.Tensor:
-    """Record the backward event for cuda graph capture."""
-    return FineGrainedOffloadingBackwardRecordFunction.apply(tensor, event)
+        debug_rank("FineGrainedOffloadingBackwardRecordFunction backward")
+        mgr = PipelineOffloadManager.get_instance()
+        torch.cuda.current_stream().record_event(mgr.cuda_graph_event)
+        torch.cuda.current_stream().wait_stream(mgr.h2d_stream)
+        return (grad_output,)
 
 
 class FineGrainedActivationOffloadingInterface:
@@ -1226,10 +1283,32 @@ def __exit__(self, *args: Any):
             PipelineOffloadManager.get_instance().__exit__()
 
     @staticmethod
-    def init_chunk_handler(vp_size, vp_stage, min_offloaded_tensor_size):
+    def cuda_graph_stream():
+        """Get the CUDA graph stream."""
+        return PipelineOffloadManager.get_instance().cuda_graph_stream
+
+    @staticmethod
+    def cuda_graph_event():
+        """Get the CUDA graph event."""
+        return PipelineOffloadManager.get_instance().cuda_graph_event
+
+    @staticmethod
+    def init_chunk_handler(
+        pp_rank,
+        vp_size,
+        vp_stage,
+        min_offloaded_tensor_size,
+        delta_offload_bytes_across_pp_ranks,
+        activation_offload_fraction,
+    ):
         """Initialize the chunk handler, called at the start of a microbatch forward pass."""
         PipelineOffloadManager.get_instance().init_model_chunk_offload_handler(
-            vp_size, vp_stage, min_offloaded_tensor_size
+            pp_rank,
+            vp_size,
+            vp_stage,
+            min_offloaded_tensor_size,
+            delta_offload_bytes_across_pp_ranks,
+            activation_offload_fraction,
         )
 
     @staticmethod
@@ -1237,24 +1316,30 @@ def get_context(flag):
         """Get the fine-grained offload context"""
         return PipelineOffloadManager.get_instance() if flag else nullcontext()
 
-    @staticmethod
-    def group_commit(tensor, name, forced_released_tensors=None, delay_offload=False):
-        """Group commit the tensors."""
-        return fine_grained_offloading_group_commit(
-            tensor, name, forced_released_tensors, delay_offload
-        )
+    def group_offload(self, tensor, forced_released_tensors=None, delay_offload=False):
+        """Group offload the tensors."""
+        if self.offload:
+            return fine_grained_offloading_group_commit(
+                tensor, self.name, forced_released_tensors, delay_offload
+            )
+        return tensor
 
     @staticmethod
-    def mark_not_offloadable(tensor: torch.Tensor):
+    def mark_not_offload(tensor: torch.Tensor):
         """Mark the tensor as not offloadable."""
-        PipelineOffloadManager.get_instance().mark_not_offloadable(tensor)
+        PipelineOffloadManager.get_instance().mark_not_offload(tensor)
 
     @staticmethod
-    def forward_record(event: torch.cuda.Event) -> None:
+    def forward_record() -> None:
         """Record the forward event for cuda graph capture."""
-        d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
-        torch.cuda.current_stream().record_event(event)
-        torch.cuda.current_stream().wait_stream(d2h_stream)
+        mgr = PipelineOffloadManager.get_instance()
+        torch.cuda.current_stream().record_event(mgr.cuda_graph_event)
+        torch.cuda.current_stream().wait_stream(mgr.d2h_stream)
+
+    @staticmethod
+    def backward_record(tensor) -> torch.Tensor:
+        """Record the backward event for cuda graph capture."""
+        return FineGrainedOffloadingBackwardRecordFunction.apply(tensor)
 
     @staticmethod
     def reset():
@@ -1265,3 +1350,28 @@ def reset():
     def reset_instance():
         """Reset the singleton instance."""
         PipelineOffloadManager.reset_instance()
+
+    @staticmethod
+    def flush_delayed_groups():
+        """Flush the delayed groups."""
+        PipelineOffloadManager.get_instance().flush_delayed_groups()
+
+    @staticmethod
+    def disable_offload():
+        """Disable the offload."""
+        PipelineOffloadManager.get_instance().disable_offload()
+
+    @staticmethod
+    def enable_offload():
+        """Enable the offload."""
+        PipelineOffloadManager.get_instance().enable_offload()
+
+    @staticmethod
+    def enter_replay():
+        """Enter CUDA graph replay mode to enable delayed offloading."""
+        PipelineOffloadManager.get_instance()._in_replay = True
+
+    @staticmethod
+    def exit_replay():
+        """Exit CUDA graph replay mode."""
+        PipelineOffloadManager.get_instance()._in_replay = False
diff --git a/megatron/core/pipeline_parallel/schedules.py b/megatron/core/pipeline_parallel/schedules.py
index 24ec25e5150..5c442e807df 100644
--- a/megatron/core/pipeline_parallel/schedules.py
+++ b/megatron/core/pipeline_parallel/schedules.py
@@ -1,7 +1,8 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import contextlib
 from functools import partial
+from itertools import zip_longest
 from typing import Callable, Dict, Iterator, List, Optional, Union
 
 import torch
@@ -39,7 +40,7 @@
     combined_1f1b_schedule_for_interleaved_pipelining,
     combined_1f1b_schedule_for_no_pipelining,
 )
-from .hybrid_cp_schedule import hybrid_context_parallel_forward_backward
+from .dynamic_cp_schedule import dynamic_context_parallel_forward_backward
 
 # Types
 Shape = Union[List[int], torch.Size]
@@ -293,6 +294,8 @@ def forward_step_calc_loss(
         if config.calculate_per_token_loss:
             MoEAuxLossAutoScaler.set_loss_scale(loss_scale)
         else:
+            # See https://github.com/NVIDIA/Megatron-LM/pull/2217 for detailed explanation
+            # of scaling by cp_group_size
             cp_size_for_scaling = cp_group_size if cp_group_size is not None else 1
             MoEAuxLossAutoScaler.set_loss_scale(loss_scale * cp_size_for_scaling / num_microbatches)
 
@@ -662,8 +665,8 @@ def forward_backward_no_pipelining(
             total_num_tokens,
             partial(check_first_val_step, first_val_step, forward_only),
         )
-    elif config.hybrid_context_parallel:
-        forward_data_store, total_num_tokens = hybrid_context_parallel_forward_backward(
+    elif config.dynamic_context_parallel:
+        forward_data_store, total_num_tokens = dynamic_context_parallel_forward_backward(
             forward_step_func,
             data_iterator,
             model,
@@ -873,6 +876,136 @@ def get_schedule_table(num_microbatches, num_model_chunks, microbatch_group_size
     return schedule_table
 
 
+def convert_schedule_table_to_order(num_warmup_microbatches, num_model_chunks, schedule_table):
+    """Convert a tunable schedule lookup table to the te.make_graphed_callables() accepted
+    order format. For example, the tunable schedule table for PP2 N3M5 with VP2 is as below:
+    virtual_microbatch_id | 0 1 2 3 4 5 6 7 8 9
+    microbatch_id         | 0 1 2 0 1 2 3 4 3 4
+    model_chunk_id        | 0 0 0 1 1 1 0 0 1 1
+
+    Then the forward backward separated order is:
+    forward               | 1 1 1 2 2 2 1 1 2 2
+    backward              | -2 -2 -2 -1 -1 -1 -2 -2 -1 -1
+
+    If num_warmup_microbatches is 5, the output order is:
+    1 1 1 2 2 2 -2 1 -2 1 -2 2 -1 2 -1 -1 -2 -2 -1 -1
+    """
+    _, model_chunk_id_table = zip(*schedule_table)
+    forward_order = [chunk_id + 1 for chunk_id in model_chunk_id_table]
+    backward_order = [chunk_id - num_model_chunks for chunk_id in model_chunk_id_table]
+    order = forward_order[:num_warmup_microbatches]
+    for i in range(num_warmup_microbatches, len(forward_order)):
+        order.append(forward_order[i])
+        order.append(backward_order[i - num_warmup_microbatches])
+    if num_warmup_microbatches > 0:
+        order.extend(backward_order[-num_warmup_microbatches:])
+    return order
+
+
+def get_overlap_moe_expert_parallel_comm_order(order, num_layers_per_chunk, capture_wgrad_graph):
+    """
+    This functions gets the order for overlap_moe_expert_parallel_comm schedule for the original
+    chunk-wise order list. Each chunk is transformered to chunks with only 1 layer so that
+    layers between 2 chunks can now overlap with each other while following the graph order.
+    If capture_wgrad_graph is True, the wgrad backward graph is also added to the order by
+    decreasing the layer id by 0.5.
+
+    Args:
+        order (List[int]): The original chunk-wise order list. Positive values represent forward
+            passes for chunks, negative values represent backward passes. The absolute value
+            indicates the chunk ID (1-indexed).
+        num_layers_per_chunk (List[int]): Number of graphable layers in each chunk. The length
+            of this list equals the number of chunks.
+        capture_wgrad_graph (bool): If True, weight gradient computation graphs are added to the
+            order by appending entries with layer_id - 0.5.
+
+    Returns:
+        Tuple[List[float], List[Optional[List[int]]]]: A tuple containing:
+            - new_order: The layer-wise order list where each chunk is expanded to individual
+              layers. Positive values are forward passes, negative values are backward passes.
+              Values with .5 suffix indicate weight gradient computations.
+            - chunk_id_list: A list parallel to new_order. For forward passes, contains
+              [chunk_id, layer_index_within_chunk]. For backward passes, contains None.
+
+    Example:
+        original_order: [1, 2, -2, 1, -1, -1]
+        num_layers_per_chunk: [1, 2]
+        capture_wgrad_graph=True:
+            new_order: [1, 2, 3, 1, -3, -3.5, -2, -2.5, -1, -1.5, -1, -1.5]
+            chunk_id_list: [[0, 0], [1, 0], [1, 1], [0, 0], None,
+                            None, None, None, None, None, None, None]
+        capture_wgrad_graph=False:
+            new_order: [1, 2, 3, 1, -3, -2, -1, -1]
+            chunk_id_list: [[0, 0], [1, 0], [1, 1], [0, 0], None, None, None, None]
+    """
+
+    def _add_order(new_order, chunk_id_list, c_id, layer_id, is_wgrad=False, index=None):
+        if is_wgrad:
+            new_order.append(layer_id - 0.5)
+        else:
+            new_order.append(layer_id)
+        if c_id > 0:
+            chunk_id_list.append([abs(c_id) - 1, index])
+        else:
+            chunk_id_list.append(None)
+
+    new_order = []
+    chunk_id_list = []
+    add_order = partial(_add_order, new_order, chunk_id_list)
+    first_backward_idx, last_forward_idx = None, None
+    for idx, c_id in enumerate(order):
+        if first_backward_idx is None and c_id < 0:
+            first_backward_idx = idx
+        if c_id > 0:
+            last_forward_idx = idx
+
+    def get_layer_range(c_id):
+        num_layers = num_layers_per_chunk[abs(c_id) - 1]
+        num_layers_previous_chunks = sum(num_layers_per_chunk[: abs(c_id) - 1])
+        if c_id > 0:
+            return list(
+                range(num_layers_previous_chunks + 1, num_layers_previous_chunks + num_layers + 1)
+            )
+        return list(range(-num_layers_previous_chunks - num_layers, -num_layers_previous_chunks))
+
+    # warmup stage
+    for c_id in order[:first_backward_idx]:
+        layer_range = get_layer_range(c_id)
+        new_order += layer_range
+        chunk_id_list.extend([abs(c_id) - 1, i] for i in range(len(layer_range)))
+
+    # 1f1b overlap stage
+    if first_backward_idx < last_forward_idx:
+        for c_id_b, c_id_f in zip(
+            order[first_backward_idx : last_forward_idx + 1 : 2],
+            order[first_backward_idx + 1 : last_forward_idx + 1 : 2],
+        ):
+            layer_range_f = get_layer_range(c_id_f)
+            layer_range_b = get_layer_range(c_id_b)
+            index = 0
+            for l_b, l_f in zip_longest(layer_range_b, layer_range_f, fillvalue=0):
+                # always forward graph before backward graph
+                if l_f != 0:
+                    add_order(c_id_f, l_f, index=index)
+                if l_b != 0:
+                    add_order(c_id_b, l_b)
+                    if capture_wgrad_graph and index < len(layer_range_b) - 1:
+                        add_order(c_id_b, l_b, is_wgrad=True)
+                index += 1
+            # last wgrad backward
+            if capture_wgrad_graph and layer_range_b:
+                add_order(c_id_b, layer_range_b[-1], is_wgrad=True)
+
+    # cool down stage, backward graphs only
+    for c_id in order[last_forward_idx + 1 :]:
+        for l_b in get_layer_range(c_id):
+            add_order(c_id, l_b)
+            if capture_wgrad_graph:
+                add_order(c_id, l_b, is_wgrad=True)
+
+    return new_order, chunk_id_list
+
+
 def forward_backward_pipelining_with_interleaving(
     *,
     forward_step_func,
@@ -1047,7 +1180,15 @@ def enable_grad_sync():
 
     model_type = get_model_type(model[0])
 
-    tensor_shape = [seq_length, micro_batch_size, config.hidden_size]
+    # Determine hidden dimension for P2P communication
+    # For hyper connections with multiple PP stages, use n-stream dimension
+    hidden_dim = config.hidden_size
+    if getattr(config, 'enable_hyper_connections', False) and pipeline_parallel_size > 1:
+        # For interleaved PP with hyper connections, all intermediate communications use n-stream
+        # Note: This is a simplified approach - proper VPP support may need more complex logic
+        hidden_dim = config.hidden_size * getattr(config, 'num_residual_streams', 1)
+
+    tensor_shape = [seq_length, micro_batch_size, hidden_dim]
     tensor_shape[0] = tensor_shape[0] // cp_group.size()
     if config.sequence_parallel:
         tensor_shape[0] = tensor_shape[0] // tp_group.size()
@@ -1980,9 +2121,19 @@ def get_tensor_shapes(
     config,
     tp_group: Optional[torch.distributed.ProcessGroup] = None,
     cp_group: Optional[torch.distributed.ProcessGroup] = None,
+    pp_group: Optional[torch.distributed.ProcessGroup] = None,
+    is_recv: bool = True,
 ):
     """Determine tensor shapes for pipeline communication.
 
+    For hyper connections (mHC), intermediate pipeline stages communicate n-stream tensors
+    with dimension hidden_size * num_residual_streams.
+
+    Args:
+        is_recv: If True, compute shape for receiving; if False, for sending.
+                 This matters for hyper connections where first/last stages have different
+                 send/recv dimensions.
+
     Returns [()] for variable_seq_lengths mode (shapes exchanged dynamically),
     or computed shapes for fixed sequence length mode.
     """
@@ -2000,7 +2151,27 @@ def get_tensor_shapes(
     if config.sequence_parallel:
         effective_seq_length = effective_seq_length // tp_group.size()
 
-    tensor_shapes.append((effective_seq_length, micro_batch_size, config.hidden_size))
+    # Determine hidden dimension based on hyper connections and pipeline stage
+    hidden_size = config.hidden_size
+    # TODO: make this more robust, including flexible VPP layout
+    if getattr(config, 'enable_hyper_connections', False) and pp_group is not None:
+        pp_rank = pp_group.rank()
+        pp_size = pp_group.size()
+        # For hyper connections:
+        # - recv: stages with rank > 0 receive n-stream (n*C) from previous stage
+        # - send: stages with rank < pp_size-1 send n-stream (n*C) to next stage
+        use_nstream = False
+        if is_recv and pp_rank > 0:
+            # Receiving from previous stage (which sends n*C)
+            use_nstream = True
+        elif not is_recv and pp_rank < pp_size - 1:
+            # Sending to next stage (send n*C)
+            use_nstream = True
+
+        if use_nstream:
+            hidden_size = hidden_size * getattr(config, 'num_residual_streams', 1)
+
+    tensor_shapes.append((effective_seq_length, micro_batch_size, hidden_size))
     return tensor_shapes
 
 
@@ -2168,6 +2339,8 @@ def enable_grad_sync():
         config=config,
         tp_group=tp_group,
         cp_group=cp_group,
+        pp_group=getattr(p2p_communicator, "pp_group", None),
+        is_recv=True,
     )
     send_tensor_shapes = get_tensor_shapes(
         seq_length=seq_length,
@@ -2176,6 +2349,8 @@ def enable_grad_sync():
         config=config,
         tp_group=tp_group,
         cp_group=cp_group,
+        pp_group=getattr(p2p_communicator, "pp_group", None),
+        is_recv=False,
     )
     if adjust_tensor_shapes_fn is not None:
         recv_tensor_shapes, send_tensor_shapes = adjust_tensor_shapes_fn(
diff --git a/megatron/core/safe_globals.py b/megatron/core/safe_globals.py
index 8bcfe788f60..f9f9171cd21 100755
--- a/megatron/core/safe_globals.py
+++ b/megatron/core/safe_globals.py
@@ -33,6 +33,7 @@
     RerunState,
     BytesIO,
     Signals,
+    torch._C.Generator,  # Needed for torch format ckpt loading after weights_only default change
 ]
 
 
diff --git a/megatron/core/ssm/gated_delta_net.py b/megatron/core/ssm/gated_delta_net.py
index e23599689bb..7b308952e1d 100644
--- a/megatron/core/ssm/gated_delta_net.py
+++ b/megatron/core/ssm/gated_delta_net.py
@@ -21,6 +21,12 @@
 from megatron.core.jit import jit_fuser
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.ssm.mamba_context_parallel import (
+    _all_to_all_cp2hp,
+    _all_to_all_hp2cp,
+    _redo_attention_load_balancing,
+    _undo_attention_load_balancing,
+)
 from megatron.core.tensor_parallel import get_cuda_rng_tracker
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.identity_op import IdentityOp
@@ -33,25 +39,19 @@
 )
 from megatron.core.utils import deprecate_inference_params, nvtx_range_pop, nvtx_range_push
 
-# TODO: Implement GatedDeltaNetContextParallel
-# from .gated_delta_net_context_parallel import GatedDeltaNetContextParallel
-
 try:
+    from fla.modules.convolution import causal_conv1d
     from fla.modules.l2norm import l2norm
     from fla.ops.gated_delta_rule import chunk_gated_delta_rule
 
     HAVE_FLA = True
 except ImportError:
+    causal_conv1d = None
+    l2norm = None
     chunk_gated_delta_rule = None
 
     HAVE_FLA = False
 
-try:
-    from causal_conv1d import causal_conv1d_fn
-except ImportError:
-    causal_conv1d_fn = None
-    causal_conv1d_update = None
-
 
 logger = logging.getLogger(__name__)
 
@@ -85,6 +85,7 @@ def __init__(
         use_qk_l2norm: bool = True,
         A_init_range: Tuple[float, float] = (1, 16),
         pg_collection: ProcessGroupCollection = None,
+        **kwargs,
     ):
         """
         Args:
@@ -117,6 +118,7 @@ def __init__(
         self.use_qk_l2norm = use_qk_l2norm
         assert pg_collection is not None, "pg_collection must be provided for GatedDeltaNet"
         self.pg_collection = pg_collection
+        self.cp_size = self.pg_collection.cp.size()
         self.tp_size = self.pg_collection.tp.size()
         self.sp_size = self.tp_size if config.sequence_parallel else 1
 
@@ -132,6 +134,8 @@ def __init__(
         self.num_value_heads = config.linear_num_value_heads
         self.qk_dim = self.key_head_dim * self.num_key_heads
         self.v_dim = self.value_head_dim * self.num_value_heads
+        self.qk_dim_local_tp = self.qk_dim // self.tp_size
+        self.v_dim_local_tp = self.v_dim // self.tp_size
 
         # Input projection (hidden_states -> q, k, v, gate, beta, alpha)
         # TODO: for now, output gate is forced for GDN.
@@ -202,6 +206,11 @@ def __init__(
         setattr(self.A_log, "tensor_model_parallel", True)
         setattr(self.A_log, "partition_dim", 0)
 
+        if self.config.deterministic_mode:
+            self.gated_delta_rule = torch_chunk_gated_delta_rule
+        else:
+            self.gated_delta_rule = chunk_gated_delta_rule
+
         # Output layernorm before projection
         self.out_norm = build_module(
             submodules.out_norm,
@@ -224,8 +233,6 @@ def __init__(
             tp_group=self.pg_collection.tp,
         )
 
-        # TODO: support CP
-
         self.reset_parameters()
 
     def reset_parameters(self):
@@ -254,9 +261,7 @@ def forward(
         self,
         hidden_states: Tensor,
         attention_mask: Tensor,
-        key_value_states: Optional[Tensor] = None,
         inference_context: Optional[BaseInferenceContext] = None,
-        attention_bias: Optional[Tensor] = None,
         packed_seq_params: Optional[PackedSeqParams] = None,
         sequence_len_offset: Optional[int] = None,
         *,
@@ -269,10 +274,8 @@ def forward(
         Args:
             hidden_states (Tensor): Hidden states.
             attention_mask (Tensor): Attention mask.
-            key_value_states (Optional[Tensor]): Key/value states (for cross attention).
             inference_context (Optional[BaseInferenceContext]): Inference context that manages
                 KV cache.
-            attention_bias (Optional[Tensor]): Attention bias.
             packed_seq_params (Optional[PackedSeqparams]): Parameters used for THD format.
             sequence_len_offset (Optional[int]): Sequence length offset used for
                 inference CUDA graphs.
@@ -286,7 +289,7 @@ def forward(
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
         seq_len, batch, _ = hidden_states.shape
-        seq_len = seq_len * self.sp_size
+        seq_len = seq_len * self.sp_size * self.cp_size
 
         if inference_context is not None:
             assert (
@@ -297,14 +300,76 @@ def forward(
             raise NotImplementedError("GDN does not support inference for now.")
 
         if packed_seq_params is not None:
-            # TODO: support packed sequence
-            raise NotImplementedError("GDN does not support packed sequence for now.")
+            assert batch == 1, "Packed sequence expects batch dimension to be 1"
+            assert (
+                not self.config.deterministic_mode
+            ), "Packed sequence does not support deterministic mode."
+
+            # Prefer cu_seqlens_q_padded if available, otherwise use cu_seqlens_q
+            if packed_seq_params.cu_seqlens_q_padded is not None:
+                cu_seqlens_q = packed_seq_params.cu_seqlens_q_padded
+            else:
+                cu_seqlens_q = packed_seq_params.cu_seqlens_q
+            # Prefer cu_seqlens_kv_padded if available, otherwise use cu_seqlens_kv
+            if packed_seq_params.cu_seqlens_kv_padded is not None:
+                cu_seqlens_kv = packed_seq_params.cu_seqlens_kv_padded
+            else:
+                cu_seqlens_kv = packed_seq_params.cu_seqlens_kv
+            assert torch.equal(cu_seqlens_q, cu_seqlens_kv), (
+                "Currently only support cu_seqlens_q equals to cu_seqlens_kv, "
+                f"but got {cu_seqlens_q=} and {cu_seqlens_kv=}"
+            )
+            num_packed_seqs = cu_seqlens_q.shape[0] - 1
+            assert num_packed_seqs > 0, (
+                "Number of packed sequences must be greater than 0, "
+                f"but got {cu_seqlens_q=} and {cu_seqlens_kv=}"
+            )
+        else:
+            cu_seqlens_q = None
+            cu_seqlens_kv = None
 
         # Input projection
         nvtx_range_push(suffix="in_proj")
         qkvzba, _ = self.in_proj(hidden_states)
         nvtx_range_pop(suffix="in_proj")
 
+        # CP All to All: CP to HP
+        if packed_seq_params is not None:
+            unpacked_qkvzba = _unpack_sequence(qkvzba, cu_seqlens_q // self.cp_size, dim=0)
+            outputs = []
+            for qkvzba_i in unpacked_qkvzba:
+                qkvzba_i = tensor_a2a_cp2hp(
+                    qkvzba_i,
+                    seq_dim=0,
+                    head_dim=-1,
+                    cp_group=self.pg_collection.cp,
+                    split_sections=[
+                        self.qk_dim_local_tp,
+                        self.qk_dim_local_tp,
+                        self.v_dim_local_tp,
+                        self.v_dim_local_tp,
+                        self.num_value_heads // self.tp_size,
+                        self.num_value_heads // self.tp_size,
+                    ],
+                )
+                outputs.append(qkvzba_i)
+            qkvzba = torch.cat(outputs, dim=0)
+        else:
+            qkvzba = tensor_a2a_cp2hp(
+                qkvzba,
+                seq_dim=0,
+                head_dim=-1,
+                cp_group=self.pg_collection.cp,
+                split_sections=[
+                    self.qk_dim_local_tp,
+                    self.qk_dim_local_tp,
+                    self.v_dim_local_tp,
+                    self.v_dim_local_tp,
+                    self.num_value_heads // self.tp_size,
+                    self.num_value_heads // self.tp_size,
+                ],
+            )
+
         # Transpose: s b x --> b s x
         # From sbhd to bshd format
         qkvzba = qkvzba.transpose(0, 1)
@@ -313,10 +378,10 @@ def forward(
         qkv, gate, beta, alpha = torch.split(
             qkvzba,
             [
-                (self.qk_dim * 2 + self.v_dim) // self.tp_size,
-                self.v_dim // self.tp_size,
-                self.num_value_heads // self.tp_size,
-                self.num_value_heads // self.tp_size,
+                (self.qk_dim_local_tp * 2 + self.v_dim_local_tp) // self.cp_size,
+                self.v_dim_local_tp // self.cp_size,
+                self.num_value_heads // self.tp_size // self.cp_size,
+                self.num_value_heads // self.tp_size // self.cp_size,
             ],
             dim=-1,
         )
@@ -325,74 +390,83 @@ def forward(
         alpha = alpha.reshape(batch, seq_len, -1)
 
         # Convolution on qkv
-        qkv = qkv.transpose(1, 2).contiguous()  # b, s, d -> b, d, s
         nvtx_range_push(suffix="conv1d")
-        if (causal_conv1d_fn is None) or self.config.deterministic_mode:
-            qkv = self.act_fn(self.conv1d(qkv)[..., :seq_len])
+        seq_len = qkv.shape[1]
+        qkv_channels_split_sections = [
+            self.qk_dim_local_tp,
+            self.qk_dim_local_tp,
+            self.v_dim_local_tp,
+        ]
+        conv1d_weight = get_parameter_local_cp(
+            self.conv1d.weight,
+            dim=0,
+            cp_group=self.pg_collection.cp,
+            split_sections=qkv_channels_split_sections,
+        )
+        conv1d_bias = (
+            get_parameter_local_cp(
+                self.conv1d.bias,
+                dim=0,
+                cp_group=self.pg_collection.cp,
+                split_sections=qkv_channels_split_sections,
+            )
+            if self.conv_bias
+            else None
+        )
+        if self.config.deterministic_mode:
+            qkv = qkv.transpose(1, 2).contiguous()  # b, s, d -> b, d, s
+            conv_out = F.conv1d(
+                input=qkv,  # Torch-native only accept [b, d, s] format input
+                weight=conv1d_weight,
+                bias=conv1d_bias,
+                stride=self.conv1d.stride,
+                padding=self.conv1d.padding,
+                dilation=self.conv1d.dilation,
+                groups=self.conv_dim_local_tp // self.cp_size,
+            )
+            qkv = self.act_fn(conv_out[..., :seq_len])
+            qkv = qkv.transpose(1, 2)  # b, d, s -> b, s, d
         else:
             assert self.activation in ["silu", "swish"]
-            qkv = causal_conv1d_fn(
-                x=qkv,
-                weight=self.conv1d.weight.squeeze(1),  # d, 1, w -> d, w
-                bias=self.conv1d.bias,
+            qkv, _ = causal_conv1d(
+                x=qkv,  # FLA conv1d accepts [b, s, d] format input
+                weight=conv1d_weight.squeeze(1),  # d, 1, w -> d, w
+                bias=conv1d_bias,
                 activation=self.activation,
+                initial_state=None,
+                output_final_state=False,
+                cu_seqlens=cu_seqlens_q,
             )
         nvtx_range_pop(suffix="conv1d")
-        # Split qkv into query, key, and value
-        qkv = qkv.transpose(1, 2)  # b, d, s -> b, s, d
-        query, key, value = torch.split(
-            qkv,
-            [self.qk_dim // self.tp_size, self.qk_dim // self.tp_size, self.v_dim // self.tp_size],
-            dim=-1,
-        )
-        query = query.reshape(batch, seq_len, -1, self.key_head_dim)
-        key = key.reshape(batch, seq_len, -1, self.key_head_dim)
-        value = value.reshape(batch, seq_len, -1, self.value_head_dim)
-        # Apply L2 norm to query and key
-        if self.use_qk_l2norm:
-            query = l2norm(query.contiguous())
-            key = l2norm(key.contiguous())
-        if self.num_value_heads // self.num_key_heads > 1:
-            query = query.repeat_interleave(self.num_value_heads // self.num_key_heads, dim=2)
-            key = key.repeat_interleave(self.num_value_heads // self.num_key_heads, dim=2)
 
-        # Make contiguous
-        query = query.contiguous()
-        key = key.contiguous()
-        value = value.contiguous()
-        gate = gate.contiguous()
-        beta = beta.contiguous()
-        alpha = alpha.contiguous()
+        # Prepare QKV tensors (split, reshape, L2 norm, repeat_interleave, contiguous)
+        nvtx_range_push(suffix="prepare_qkv_for_gated_delta_rule")
+        query, key, value, gate, beta, alpha = self._prepare_qkv_for_gated_delta_rule(
+            qkv, gate, beta, alpha, batch, seq_len
+        )
+        nvtx_range_pop(suffix="prepare_qkv_for_gated_delta_rule")
 
         # Calculate g and beta
         nvtx_range_push(suffix="g_and_beta")
-        g = -self.A_log.exp() * F.softplus(alpha.float() + self.dt_bias)  # In fp32
-        beta = beta.sigmoid()
+        A_log_local_cp = get_parameter_local_cp(self.A_log, dim=0, cp_group=self.pg_collection.cp)
+        dt_bias_local_cp = get_parameter_local_cp(
+            self.dt_bias, dim=0, cp_group=self.pg_collection.cp
+        )
+        g, beta = self._compute_g_and_beta(A_log_local_cp, dt_bias_local_cp, alpha, beta)
         nvtx_range_pop(suffix="g_and_beta")
 
         nvtx_range_push(suffix="gated_delta_rule")
-        if self.config.deterministic_mode:
-            core_attn_out, last_recurrent_state = torch_chunk_gated_delta_rule(
-                query,
-                key,
-                value,
-                g=g,
-                beta=beta,
-                initial_state=None,
-                output_final_state=False,
-                use_qk_l2norm_in_kernel=False,
-            )
-        else:
-            core_attn_out, last_recurrent_state = chunk_gated_delta_rule(
-                query,
-                key,
-                value,
-                g=g,
-                beta=beta,
-                initial_state=None,
-                output_final_state=False,
-                use_qk_l2norm_in_kernel=False,
-            )
+        core_attn_out, last_recurrent_state = self.gated_delta_rule(
+            query,
+            key,
+            value,
+            g=g,
+            beta=beta,
+            initial_state=None,
+            output_final_state=False,
+            use_qk_l2norm_in_kernel=False,
+            cu_seqlens=cu_seqlens_q,
+        )
         nvtx_range_pop(suffix="gated_delta_rule")
 
         # RMSNorm
@@ -405,6 +479,21 @@ def forward(
         norm_out = norm_out.reshape(batch, seq_len, -1)
         norm_out = norm_out.transpose(0, 1).contiguous()
 
+        # CP all to all: HP to CP
+        if packed_seq_params is not None:
+            unpacked_norm_out = _unpack_sequence(norm_out, cu_seqlens_q, dim=0)
+            outputs = []
+            for norm_out_i in unpacked_norm_out:
+                norm_out_i = tensor_a2a_hp2cp(
+                    norm_out_i, seq_dim=0, head_dim=-1, cp_group=self.pg_collection.cp
+                )
+                outputs.append(norm_out_i)
+            norm_out = torch.cat(outputs, dim=0)
+        else:
+            norm_out = tensor_a2a_hp2cp(
+                norm_out, seq_dim=0, head_dim=-1, cp_group=self.pg_collection.cp
+            )
+
         # Output projection
         nvtx_range_push(suffix="out_proj")
         out, out_bias = self.out_proj(norm_out)
@@ -424,6 +513,57 @@ def _apply_gated_norm(self, x, gate):
         y = y.to(x_dtype)
         return y
 
+    @jit_fuser
+    def _prepare_qkv_for_gated_delta_rule(self, qkv, gate, beta, alpha, batch, seq_len):
+        """
+        Prepare query, key, value, gate, beta, alpha tensors for gated delta rule.
+        Fuses split, reshape, L2 norm, repeat_interleave, and contiguous operations.
+        """
+        # Split qkv into query_key and value
+        query_key, value = torch.split(
+            qkv,
+            [2 * self.qk_dim_local_tp // self.cp_size, self.v_dim_local_tp // self.cp_size],
+            dim=-1,
+        )
+
+        # Reshape query_key and value
+        query_key = query_key.reshape(batch, seq_len, -1, self.key_head_dim)
+        value = value.reshape(batch, seq_len, -1, self.value_head_dim)
+
+        # Apply L2 norm to query and key
+        if self.use_qk_l2norm:
+            query_key = l2norm(query_key.contiguous())
+
+        # Split query and key
+        split_size = self.qk_dim_local_tp // self.key_head_dim // self.cp_size
+        query, key = torch.split(query_key, [split_size, split_size], dim=2)
+
+        # Expand query and key if needed (grouped query attention)
+        if self.num_value_heads // self.num_key_heads > 1:
+            repeat_factor = self.num_value_heads // self.num_key_heads
+            query = query.repeat_interleave(repeat_factor, dim=2)
+            key = key.repeat_interleave(repeat_factor, dim=2)
+
+        # Make all tensors contiguous
+        query = query.contiguous()
+        key = key.contiguous()
+        value = value.contiguous()
+        gate = gate.contiguous()
+        beta = beta.contiguous()
+        alpha = alpha.contiguous()
+
+        return query, key, value, gate, beta, alpha
+
+    @jit_fuser
+    def _compute_g_and_beta(self, A_log_local_cp, dt_bias_local_cp, alpha, beta):
+        """
+        Compute g (decay) and beta (sigmoid) for gated delta rule.
+        Fuses exp, softplus, mul, neg, and sigmoid operations.
+        """
+        g = -A_log_local_cp.exp() * F.softplus(alpha.float() + dt_bias_local_cp)  # In fp32
+        beta = beta.sigmoid()
+        return g, beta
+
     def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_group=None):
         """Provide a sharded state dictionary for distributed checkpointing."""
         # Guard for cases metadata is not provided
@@ -478,10 +618,10 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_gr
         sharded_state_dict[f"{prefix}in_proj.weight"] = _split_tensor_factory(
             sharded_state_dict[f"{prefix}in_proj.weight"],
             [
-                self.qk_dim // self.tp_size,
-                self.qk_dim // self.tp_size,
-                self.v_dim // self.tp_size,
-                self.v_dim // self.tp_size,
+                self.qk_dim_local_tp,
+                self.qk_dim_local_tp,
+                self.v_dim_local_tp,
+                self.v_dim_local_tp,
                 self.num_value_heads // self.tp_size,
                 self.num_value_heads // self.tp_size,
             ],
@@ -501,11 +641,7 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_gr
         for conv_layer_name in conv_layer_name_list:
             sharded_state_dict[f"{prefix}{conv_layer_name}"] = _split_tensor_factory(
                 sharded_state_dict[f"{prefix}{conv_layer_name}"],
-                [
-                    self.qk_dim // self.tp_size,
-                    self.qk_dim // self.tp_size,
-                    self.v_dim // self.tp_size,
-                ],
+                [self.qk_dim_local_tp, self.qk_dim_local_tp, self.v_dim_local_tp],
                 ["query", "key", "value"],
                 0,
             )
@@ -526,6 +662,20 @@ def _backward_out_proj(self):
         self.out_proj.backward_dw()
 
 
+def _unpack_sequence(x, cu_seqlens, dim=1):
+    unpacked_x = []
+    num_seqs = cu_seqlens.shape[0] - 1
+    for i in range(num_seqs):
+        idx_start = cu_seqlens[i].item()
+        idx_end = cu_seqlens[i + 1].item()
+        chunked_index = [slice(None)] * dim + [slice(idx_start, idx_end)]
+        unpacked_x.append(x[tuple(chunked_index)])
+    return unpacked_x
+
+
+####################
+# Sharded state dict utilities
+####################
 def _split_tensor_factory(
     orig_sh_ten: ShardedTensor, split_sections: List[int], split_names: List[str], split_dim: int
 ) -> ShardedTensorFactory:
@@ -586,6 +736,184 @@ def sh_ten_merge_fn(sub_state_dict):
     )
 
 
+####################
+# Context parallel utilities
+####################
+def get_parameter_local_cp(
+    param: torch.Tensor,
+    dim: int,
+    cp_group: torch.distributed.ProcessGroup,
+    split_sections: Optional[List[int]] = None,
+) -> torch.Tensor:
+    """Get the local parameter for the current context parallel rank.
+
+    Args:
+        param (torch.Tensor): The entire parameter to get the local parameter for.
+        dim (int): The dimension to split the parameter along. Usually the dimension of head.
+        cp_group (torch.distributed.ProcessGroup): The context parallel group.
+        split_sections (Optional[List[int]]): If not None,
+            first split the parameter along the dimension dim into sections,
+            then get the local hidden parallel weights separately,
+            finally concatenate the local hidden parallel weights along the dimension dim.
+
+    Returns:
+        torch.Tensor: The local parameter for the current context parallel rank.
+    """
+
+    cp_size = cp_group.size()
+    cp_rank = cp_group.rank()
+
+    # No need to split if CP size is 1.
+    if cp_size == 1:
+        return param
+
+    # Split first if needed.
+    if split_sections is not None:
+        inputs = torch.split(param, split_sections, dim=dim)
+        outputs = []
+        for p in inputs:
+            p = get_parameter_local_cp(p, dim, cp_group)
+            outputs.append(p)
+        return torch.cat(outputs, dim=dim)
+
+    # Slice the parameter.
+    slices = [slice(None)] * param.dim()
+    dim_size = param.size(dim=dim)
+    slices[dim] = slice(cp_rank * dim_size // cp_size, (cp_rank + 1) * dim_size // cp_size)
+    param = param[slices]
+    return param
+
+
+def tensor_a2a_cp2hp(
+    tensor: torch.Tensor,
+    seq_dim: int,
+    head_dim: int,
+    cp_group: torch.distributed.ProcessGroup,
+    split_sections: Optional[List[int]] = None,
+    undo_attention_load_balancing: bool = True,
+):
+    """All-to-all context parallel to hidden parallel.
+
+    Args:
+        tensor (torch.Tensor): The tensor to all-to-all.
+            Currently only support (seq_len, batch, head_dim) shaped tensor.
+        seq_dim (int): The dimension of sequence length. Currently only supports seq_dim == 0.
+        head_dim (int): The dimension of head. Currently only supports head_dim == -1 or 2.
+        cp_group (torch.distributed.ProcessGroup): The context parallel group.
+        split_sections (Optional[List[int]]): If not None, split the tensor along the dimension
+            head_dim into sections first, then do all-to-all for each section separately,
+            finally concatenate the separated tensors along the dimension head_dim.
+        undo_attention_load_balancing (bool): Whether to undo the attention load balancing of CP.
+
+    Returns:
+        torch.Tensor: The all-to-all tensor.
+    """
+
+    cp_size = cp_group.size()
+
+    # No need to all-to-all if CP size is 1.
+    if cp_size == 1:
+        return tensor
+
+    # Limitations of mamba_context_parallel._all_to_all_cp2hp.
+    assert seq_dim == 0, f"tensor_a2a_cp2hp only supports seq_dim == 0 for now, but got {seq_dim=}"
+    assert (
+        head_dim == -1 or head_dim == 2
+    ), f"tensor_a2a_cp2hp only supports head_dim == -1 or 2 for now, but got {head_dim=}"
+    assert (
+        tensor.dim() == 3
+    ), f"tensor_a2a_cp2hp only supports 3-d input tensor for now, but got {tensor.dim()=}"
+
+    # Split first if needed.
+    if split_sections is not None:
+        inputs = torch.split(tensor, split_sections, dim=head_dim)
+        outputs = []
+        for x in inputs:
+            x = tensor_a2a_cp2hp(
+                x,
+                seq_dim=seq_dim,
+                head_dim=head_dim,
+                cp_group=cp_group,
+                undo_attention_load_balancing=False,
+            )
+            outputs.append(x)
+        tensor = torch.cat(outputs, dim=head_dim)
+    else:
+        tensor = _all_to_all_cp2hp(tensor, cp_group)
+
+    # Undo attention load balancing last if needed.
+    if undo_attention_load_balancing:
+        tensor = _undo_attention_load_balancing(tensor, cp_size)
+    return tensor
+
+
+def tensor_a2a_hp2cp(
+    tensor: torch.Tensor,
+    seq_dim: int,
+    head_dim: int,
+    cp_group: torch.distributed.ProcessGroup,
+    split_sections: Optional[List[int]] = None,
+    redo_attention_load_balancing: bool = True,
+):
+    """All-to-all hidden parallel to context parallel.
+
+    Args:
+        tensor (torch.Tensor): The tensor to all-to-all.
+            Currently only support (seq_len, batch, head_dim) shaped tensor.
+        seq_dim (int): The dimension of sequence length. Currently only supports seq_dim == 0.
+        head_dim (int): The dimension of head. Currently only supports head_dim == -1 or 2.
+        cp_group (torch.distributed.ProcessGroup): The context parallel group.
+        split_sections (Optional[List[int]]): If not None, first split the tensor along the
+            dimension head_dim into sections, then do all-to-all for each section separately,
+            finally concatenate the separated tensors along the dimension head_dim.
+        redo_attention_load_balancing (bool): Whether to redo the attention load balancing of HP.
+
+    Returns:
+        torch.Tensor: The all-to-all tensor.
+    """
+
+    cp_size = cp_group.size()
+
+    # No need to all-to-all if CP size is 1.
+    if cp_size == 1:
+        return tensor
+
+    # Limitations of mamba_context_parallel._all_to_all_hp2cp.
+    assert seq_dim == 0, f"tensor_a2a_cp2hp only supports seq_dim == 0 for now, but got {seq_dim=}"
+    assert (
+        head_dim == -1 or head_dim == 2
+    ), f"tensor_a2a_cp2hp only supports head_dim == -1 or 2 for now, but got {head_dim=}"
+    assert (
+        tensor.dim() == 3
+    ), f"tensor_a2a_cp2hp only supports 3-d input tensor for now, but got {tensor.dim()=}"
+
+    # Redo attention load balancing first if needed.
+    if redo_attention_load_balancing:
+        tensor = _redo_attention_load_balancing(tensor, cp_size)
+
+    # Split first if needed.
+    if split_sections is not None:
+        inputs = torch.split(tensor, split_sections, dim=head_dim)
+        outputs = []
+        for x in inputs:
+            x = tensor_a2a_hp2cp(
+                x,
+                seq_dim=seq_dim,
+                head_dim=head_dim,
+                cp_group=cp_group,
+                redo_attention_load_balancing=False,
+            )
+            outputs.append(x)
+        tensor = torch.cat(outputs, dim=head_dim)
+    else:
+        tensor = _all_to_all_hp2cp(tensor, cp_group)
+
+    return tensor
+
+
+####################
+# Torch native gated delta rule
+####################
 def torch_chunk_gated_delta_rule(
     query,
     key,
@@ -596,6 +924,7 @@ def torch_chunk_gated_delta_rule(
     initial_state=None,
     output_final_state=False,
     use_qk_l2norm_in_kernel=False,
+    cu_seqlens=None,
 ):
     # pylint: disable=line-too-long
     '''
@@ -605,6 +934,10 @@ def torch_chunk_gated_delta_rule(
     Reference: https://github.com/huggingface/transformers/blob/144c8ce2809a2e21914017652700e1ecb450501e/src/transformers/models/qwen3_next/modeling_qwen3_next.py#L470-L547
     '''
 
+    assert (
+        cu_seqlens is None
+    ), "cu_seqlens is not supported for torch_chunk_gated_delta_rule for now."
+
     initial_dtype = query.dtype
     if use_qk_l2norm_in_kernel:
         query = l2norm(query, dim=-1, eps=1e-6)
diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py
index 314790f6a5d..3b5774a740e 100644
--- a/megatron/core/ssm/mamba_mixer.py
+++ b/megatron/core/ssm/mamba_mixer.py
@@ -320,18 +320,26 @@ def __init__(
         self.act = nn.SiLU()
 
         with get_cuda_rng_tracker().fork():
-            # Initialize dt bias so that F.softplus(dt_bias) is between dt_min and dt_max
-            dt = torch.exp(
-                torch.rand(
+            if self.config.perform_initialization:
+                # Initialize dt bias so that F.softplus(dt_bias) is between dt_min and dt_max
+                dt = torch.exp(
+                    torch.rand(
+                        self.nheads_local_tp,
+                        device=torch.cuda.current_device(),
+                        dtype=config.params_dtype,
+                    )
+                    * (math.log(dt_max) - math.log(dt_min))
+                    + math.log(dt_min)
+                ).clamp(min=dt_init_floor)
+                # Inverse of softplus: https://github.com/pytorch/pytorch/issues/72759
+                inv_dt = dt + torch.log(-torch.expm1(-dt))
+            else:
+                inv_dt = torch.empty(
                     self.nheads_local_tp,
                     device=torch.cuda.current_device(),
                     dtype=config.params_dtype,
                 )
-                * (math.log(dt_max) - math.log(dt_min))
-                + math.log(dt_min)
-            ).clamp(min=dt_init_floor)
-            # Inverse of softplus: https://github.com/pytorch/pytorch/issues/72759
-            inv_dt = dt + torch.log(-torch.expm1(-dt))
+
             self.dt_bias = nn.Parameter(inv_dt)
             setattr(self.dt_bias, "tensor_model_parallel", True)
             setattr(self.dt_bias, "partition_dim", 0)
diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py
index 666245a9f6f..40c7e712a1d 100644
--- a/megatron/core/tensor_parallel/layers.py
+++ b/megatron/core/tensor_parallel/layers.py
@@ -255,6 +255,10 @@ def __init__(
                     rank=get_pg_rank(self.tp_group),
                     world_size=get_pg_size(self.tp_group),
                 )
+            else:
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight, is_parallel=True, dim=0, stride=1
+                )
         else:
             self.weight = Parameter(
                 torch.empty(
@@ -266,6 +270,10 @@ def __init__(
             )
             if config.perform_initialization:
                 _initialize_affine_weight_gpu(self.weight, init_method, partition_dim=0, stride=1)
+            else:
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight, is_parallel=True, dim=0, stride=1
+                )
 
     def forward(self, input_):
         """Forward.
@@ -845,6 +853,10 @@ def __init__(
                         rank=rank,
                         world_size=world_size,
                     )
+                else:
+                    set_tensor_model_parallel_attributes(
+                        tensor=self.weight, is_parallel=True, dim=0, stride=stride
+                    )
             else:
                 self.weight = Parameter(
                     torch.empty(
@@ -862,6 +874,10 @@ def __init__(
                         stride=stride,
                         is_expert=self.is_expert,
                     )
+                else:
+                    set_tensor_model_parallel_attributes(
+                        tensor=self.weight, is_parallel=True, dim=0, stride=stride
+                    )
 
             setattr(self.weight, "allreduce", not (self.is_expert and self.expert_parallel))
         else:
@@ -1173,6 +1189,10 @@ def __init__(
                     rank=rank,
                     world_size=world_size,
                 )
+            else:
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight, is_parallel=True, dim=1, stride=stride
+                )
         else:
             self.weight = Parameter(
                 torch.empty(
@@ -1190,6 +1210,10 @@ def __init__(
                     stride=stride,
                     is_expert=self.is_expert,
                 )
+            else:
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight, is_parallel=True, dim=1, stride=stride
+                )
         setattr(self.weight, "allreduce", not (self.is_expert and self.expert_parallel))
 
         if bias:
diff --git a/megatron/core/tensor_parallel/random.py b/megatron/core/tensor_parallel/random.py
index 92d39ba92ef..4516fe10d88 100644
--- a/megatron/core/tensor_parallel/random.py
+++ b/megatron/core/tensor_parallel/random.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 # Parts of the code here are adapted from PyTorch
 # repo: https://github.com/pytorch/pytorch
@@ -598,7 +598,9 @@ def forward(
     @staticmethod
     def backward(ctx, *args):
         """Backward pass."""
-        if not torch.autograd._is_checkpoint_valid():
+        from megatron.core.transformer.cuda_graphs import is_graph_capturing
+
+        if not torch.autograd._is_checkpoint_valid() and not is_graph_capturing():
             raise RuntimeError(
                 "Checkpointing is not compatible with .grad(), "
                 "please use .backward() if possible"
@@ -642,10 +644,67 @@ def checkpoint(
     return CheckpointFunction.apply(function, distribute_saved_activations, *args)
 
 
+def _save_args_to_ctx(ctx, args):
+    """Save mixed tensor/non-tensor arguments into autograd ctx.
+
+    Since save_for_backward only supports tensors, this function separates
+    tensor and non-tensor arguments, saving tensors via save_for_backward
+    and storing non-tensor metadata (indices and values) as ctx attributes.
+
+    Use _load_args_from_ctx to reconstruct the original args.
+    """
+    tensor_args = []
+    non_tensor_entries = []
+
+    for index, arg in enumerate(args):
+        if isinstance(arg, torch.Tensor):
+            tensor_args.append(arg)
+            continue
+        non_tensor_entries.append((index, arg))
+
+    ctx.save_for_backward(*detach_variable(tuple(tensor_args)))
+    ctx._non_tensor_entries = tuple(non_tensor_entries)
+    ctx._total_args_count = len(args)
+
+
+def _load_args_from_ctx(ctx):
+    """Load and reconstruct mixed tensor/non-tensor arguments from autograd ctx.
+
+    This is the inverse of _save_args_to_ctx. It retrieves tensors from
+    ctx.saved_tensors and merges them with stored non-tensor arguments
+    to reconstruct the original args in their original order.
+
+    Returns:
+        tuple of reconstructed arguments in their original order.
+    """
+
+    def _detach_with_grad(tensor):
+        detached = tensor.detach()
+        detached.requires_grad_(tensor.requires_grad)
+        return detached
+
+    tensor_iter = iter(_detach_with_grad(t) for t in ctx.saved_tensors)
+    total_args_count = ctx._total_args_count
+    non_tensor_map = dict(ctx._non_tensor_entries)
+
+    reconstructed_args = []
+    for index in range(total_args_count):
+        if index in non_tensor_map:
+            reconstructed_args.append(non_tensor_map[index])
+        else:
+            reconstructed_args.append(next(tensor_iter))
+    return tuple(reconstructed_args)
+
+
 class CheckpointWithoutOutputFunction(torch.autograd.Function):
     """
     Checkpoint Function Helper for CheckpointWithoutOutput.
     Save context for recompute.
+
+    Handles both tensor and non-tensor arguments:
+    - Tensor arguments are saved via save_for_backward
+    - Non-tensor arguments (int, float, bool, None, etc.) are stored separately
+      in ctx attributes and reconstructed during recomputation
     """
 
     @staticmethod
@@ -668,7 +727,10 @@ def forward(
 
         with torch.no_grad(), fwd_ctx:
             outputs = run_function(*args)
-        ctx.save_for_backward(*detach_variable(args))
+
+        # Save tensor and non-tensor arguments into ctx for recomputation
+        _save_args_to_ctx(ctx, args)
+
         # the CheckpointWithoutOutput object is passed in, then it can access the saved input
         # tensors later for recomputation
         checkpoint_without_output_obj.ctx = ctx
@@ -685,10 +747,56 @@ def backward(ctx, *args):
         torch.autograd.backward(outputs, args)
         ctx.outputs = None
         ctx.inputs = None
-        grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else inp for inp in inputs)
+        grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else None for inp in inputs)
         return (None, None) + grads
 
 
+class CheckpointManager:
+    """
+    Manages multiple CheckpointWithoutOutput objects within a TransformerBlock
+    cross layer recomputations, enabling unified recomputation during backward pass.
+    This is particularly useful for scenarios where multiple checkpoint operations have
+    sequential dependencies (i.e., the output of one checkpoint is the input of the next).
+
+    Usage:
+        ckptManager = CheckpointManager()
+        ckpt_function = CheckpointWithoutOutput(ckpt_manager=ckptManager)
+        ckpt_function.checkpoint(run_function, *args)
+        # other checkpointed operations
+        ckpt_manager.discard_all_outputs_and_register_unified_recompute(final_output)
+    """
+
+    def __init__(self):
+        self.checkpoints = []
+        # Set by TransformerBlock before each layer forward.
+        # When True, the layer should keep block-boundary output uncheckpointed.
+        self.is_last_layer_in_recompute_block = False
+
+    def add_checkpoint(self, ckpt):
+        """Add a checkpoint to the manager."""
+        if not isinstance(ckpt, CheckpointWithoutOutput):
+            raise TypeError("Expected CheckpointWithoutOutput object")
+        if ckpt.outputs is None:
+            raise ValueError("CheckpointWithoutOutput must call checkpoint() before adding")
+        self.checkpoints.append(ckpt)
+
+    def discard_all_outputs_and_register_unified_recompute(self, hook_tensor):
+        """Discard all checkpoint outputs to save memory and register unified recompute hook."""
+        for ckpt in self.checkpoints:
+            for output in ckpt.outputs:
+                output.untyped_storage().resize_(0)
+
+        # Register unified recompute hook
+        if hook_tensor.requires_grad:
+            hook_tensor.register_hook(self._unified_recompute_hook)
+
+    def _unified_recompute_hook(self, grad_output):
+        for ckpt in self.checkpoints:
+            # Call _recompute for each checkpoint in forward order
+            # The _recompute method will restore the output tensor storage
+            ckpt._recompute(None)
+
+
 class CheckpointWithoutOutput(object):
     """
     Checkpoint a model or part of the model and release the output.
@@ -703,8 +811,19 @@ class CheckpointWithoutOutput(object):
     discarded output tensors are directly saved in the following modules for backward computation.
     """
 
-    def __init__(self, fp8=False):
-        self.fp8 = fp8 is not None
+    def __init__(self, fp8=False, ckpt_manager=None):
+        """
+        Initialize CheckpointWithoutOutput.
+
+        Args:
+            fp8: Whether to use FP8 mode. Defaults to False.
+            ckpt_manager: Optional CheckpointManager instance. When provided,
+                         checkpoint() will auto-register to the manager, and
+                         discard_output_and_register_recompute() will only discard
+                         output without registering individual hooks.
+        """
+        self.fp8 = bool(fp8)
+        self.ckpt_manager = ckpt_manager
         self.run_function = None
         self.fwd_cpu_rng_state = None
         self.fwd_cuda_rng_state = None
@@ -713,7 +832,12 @@ def __init__(self, fp8=False):
         self.outputs = None
 
     def checkpoint(self, run_function: Callable[[Unpack[_Ts]], _R], *args: Unpack[_Ts]) -> _R:
-        """Checkpoint function."""
+        """
+        Checkpoint function.
+
+        If ckpt_manager was provided during initialization, this checkpoint
+        will be automatically registered to the manager after execution.
+        """
 
         # If in cuda graph warmup, disable checkpointing, as 'discard_output_and_register_recompute'
         # may be called in a separate graph warmup.
@@ -730,6 +854,11 @@ def checkpoint(self, run_function: Callable[[Unpack[_Ts]], _R], *args: Unpack[_T
         self.outputs = outputs
         if isinstance(self.outputs, torch.Tensor):
             self.outputs = (self.outputs,)
+
+        # Auto-register to manager if provided
+        if self.ckpt_manager is not None:
+            self.ckpt_manager.add_checkpoint(self)
+
         return outputs
 
     def _recompute(self, _):
@@ -738,7 +867,7 @@ def _recompute(self, _):
         from megatron.core.transformer.cuda_graphs import is_graph_capturing, is_graph_warmup
 
         # The recomputation has been triggered already. Just return.
-        # Handle cudagraphs, do nothing if currently in graph warmup
+        # Handle cudagraphs: do nothing if currently in graph warmup
         if self.ctx is None or is_graph_warmup():
             return
 
@@ -760,17 +889,8 @@ def _recompute(self, _):
                 recompute_ctx = contextlib.nullcontext()
                 fp8_ctx = contextlib.nullcontext()
 
-            # Store the inputs for backward pass
-            inputs = self.ctx.saved_tensors
-
-            def detach(t):
-                if isinstance(t, torch.Tensor):
-                    requires_grad = t.requires_grad
-                    t = t.detach()
-                    t.requires_grad_(requires_grad)
-                return t
-
-            inputs = tuple(detach(t) for t in inputs)
+            # Reconstruct full args list from saved ctx
+            inputs = _load_args_from_ctx(self.ctx)
             with torch.enable_grad(), fp8_ctx, recompute_ctx:
                 outputs = self.run_function(*inputs)
 
@@ -803,10 +923,11 @@ def discard_output_and_register_recompute(self, hook_tensor):
         in the forward pass and the gradient of the hook_tensor is computed before the recomputed
         tensors are used.
         """
-
+        # When ckpt_manager is set, this is a no-op.
+        # Manager handles all discarding and hook registration uniformly.
         from megatron.core.transformer.cuda_graphs import is_graph_warmup
 
-        if is_graph_warmup():
+        if self.ckpt_manager is not None or is_graph_warmup():
             return
 
         # use resize to release the output tensor memory and still keep the metadata in the tensors.
diff --git a/megatron/core/transformer/__init__.py b/megatron/core/transformer/__init__.py
index 0e3cdcfa57e..75e3b485c4f 100644
--- a/megatron/core/transformer/__init__.py
+++ b/megatron/core/transformer/__init__.py
@@ -1,6 +1,10 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 from .module import MegatronModule
 from .spec_utils import ModuleSpec, build_module
 from .transformer_config import MLATransformerConfig, TransformerConfig
-from .transformer_layer import TransformerLayer, TransformerLayerSubmodules
+from .transformer_layer import (
+    HyperConnectionTransformerLayer,
+    TransformerLayer,
+    TransformerLayerSubmodules,
+)
diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py
index ea028f9ac47..f28cc258921 100644
--- a/megatron/core/transformer/attention.py
+++ b/megatron/core/transformer/attention.py
@@ -307,6 +307,7 @@ def __init__(
         self.key_hidden_size = self.hidden_size_per_attention_head
         self.val_hidden_size = self.hidden_size_per_attention_head
 
+        # TODO: This is built twice when using MLA, should be refactored.
         if self.config.num_query_groups < world_size:
             # TE throws an assertion error if num_kv_heads / num_query_groups
             # is not divisible by TP size.
@@ -921,6 +922,13 @@ def forward(
             (Tuple[Tensor, Tensor]) Attention output and bias.
 
         """
+
+        # here we need to set the right cp group for dynamic-cp
+        _orig_cp_group = self.pg_collection.cp
+        if packed_seq_params is not None and packed_seq_params.local_cp_size is not None:
+            assert packed_seq_params.cp_group is not None, "cp_group must be set in dynamic-cp mode"
+            self.pg_collection.cp = packed_seq_params.cp_group
+
         # Check if we need to skip RoPE
         # no_rope is 0-indexed array and self.layer_number is 1-indexed
         no_rope = (
@@ -979,25 +987,26 @@ def forward(
                 self.k_layernorm is None or isinstance(self.k_layernorm, IdentityOp),
             ]
         )
+        output_gate = self.config.attention_output_gate
         # Check if fused_single_qkv_rope is requested but either unavailable or not
         # supported for the current use case.
         if self.attention_type != "cross":
             assert not (
                 self.config.fused_single_qkv_rope and split_qkv
             ), "fused_single_qkv_rope requested but not available/supported for the config."
+        if output_gate:
+            assert split_qkv, "output_gate is not supported for unsplit mixed_qkv tensor."
 
-        with off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear") as hidden_states:
+        qkv_linear_manager = off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear")
+        with qkv_linear_manager as hidden_states:
             qkv_output = self.get_query_key_value_tensors(
                 hidden_states,
                 key_value_states,
                 split_qkv=split_qkv,
                 output_gate=self.config.attention_output_gate,
             )
-        if self.offload_qkv_linear:
-            # `qkv_output` may be a tuple; commit supports tuple/list and will keep structure.
-            qkv_output = off_interface.group_commit(
-                qkv_output, name="qkv_linear", forced_released_tensors=[]
-            )
+        # `qkv_output` may be a tuple; commit supports tuple/list and will keep structure.
+        qkv_output = qkv_linear_manager.group_offload(qkv_output, forced_released_tensors=[])
         attn_mask_type = self.attn_mask_type
         block_table = None
         gate = None
@@ -1140,6 +1149,9 @@ def forward(
         # ==================================
 
         nvtx_range_push(suffix="core_attention")
+        core_attn_manager = off_interface(
+            self.offload_core_attention and self.training, query, "core_attn"
+        )
         if self.checkpoint_core_attention and self.training:
             core_attn_out = self._checkpointed_attention_forward(
                 query,
@@ -1153,9 +1165,7 @@ def forward(
         else:
             if inference_context is None or inference_context.is_static_batching():
                 # Static batching attention kernel.
-                with off_interface(
-                    self.offload_core_attention and self.training, query, "core_attn"
-                ) as query:
+                with core_attn_manager as query:
                     core_attn_out = apply_module(self.core_attention)(
                         query,
                         key,
@@ -1191,10 +1201,10 @@ def forward(
                 if is_using_quantization_scales(self.config):
                     core_attn_out[inference_context.padding_slice] = 0.0
 
-            if self.offload_core_attention and self.training:
-                core_attn_out = off_interface.group_commit(
-                    core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
-                )
+            core_attn_out = core_attn_manager.group_offload(
+                core_attn_out, forced_released_tensors=[query, key, value]
+            )
+
         if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
             # reshape to same output shape as unpacked case
             # (t, np, hn) -> (t, b=1, h=np*hn)
@@ -1213,14 +1223,13 @@ def forward(
         # Output. [sq, b, h]
         # =================
         nvtx_range_push(suffix="linear_proj")
-        with off_interface(self.offload_attn_proj, core_attn_out, "attn_proj") as core_attn_out:
+        attn_proj_manager = off_interface(self.offload_attn_proj, core_attn_out, "attn_proj")
+        with attn_proj_manager as core_attn_out:
             output, bias = self.linear_proj(core_attn_out)
-        if self.offload_attn_proj:
-            output = off_interface.group_commit(
-                output, name="attn_proj", forced_released_tensors=[core_attn_out]
-            )
+        output = attn_proj_manager.group_offload(output, forced_released_tensors=[core_attn_out])
         nvtx_range_pop(suffix="linear_proj")
 
+        self.pg_collection.cp = _orig_cp_group
         return output, bias
 
     @jit_fuser
@@ -1689,6 +1698,8 @@ def get_query_key_value_tensors(
         Derives `query` tensor from `hidden_states`, and `key`/`value` tensors
         from `key_value_states`.
         """
+        assert not output_gate, "Output gate is not supported in cross attention for now."
+
         assert split_qkv, "split_qkv must be True for CrossAttention"
         assert not output_gate, "Output gate is not supported in cross attention for now."
 
diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py
index d1850ff9bd5..c40b0568ed8 100644
--- a/megatron/core/transformer/cuda_graphs.py
+++ b/megatron/core/transformer/cuda_graphs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import dataclasses
 import gc
@@ -821,13 +821,12 @@ def create_fwd_graph(self, args, kwargs, outputs=None, clone_inputs=True):
 
         is_moe = isinstance(self.base_module, MoETransformerLayer)
         if is_moe:
-            from megatron.core.transformer.moe.moe_utils import get_moe_layer_wise_logging_tracker
+            from megatron.core.transformer.moe.moe_logging import get_moe_metrics_tracker
 
-            tracker = get_moe_layer_wise_logging_tracker()
+            moe_metrics_tracker = get_moe_metrics_tracker()
             cached_aux_losses = {}
-            for name in tracker:
-                if "values" in tracker[name]:
-                    cached_aux_losses[name] = torch.clone(tracker[name]["values"])
+            for name, entry in moe_metrics_tracker.metrics.items():
+                cached_aux_losses[name] = entry.values.clone()
 
         self.fwd_graph = torch.cuda.CUDAGraph()
 
@@ -1017,8 +1016,11 @@ def clone_ten(ten):
                     param.main_grad.copy_(main_grad_copy)
 
         if is_moe:
-            for name in tracker:
-                tracker[name]["values"].copy_(cached_aux_losses[name])
+            for name, cached_values in cached_aux_losses.items():
+                assert (
+                    name in moe_metrics_tracker.metrics
+                ), "cached metrics must be found in the tracker."
+                moe_metrics_tracker.metrics[name].values.copy_(cached_values)
 
     def create_bwd_graph(self):
         """Create a bwd cudagraph for this runner. Should be called inside
@@ -2220,6 +2222,15 @@ def _get_fp8_enabled():
                     )
             else:
                 kwargs['fp8_enabled'] = False
+
+            from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+                FineGrainedActivationOffloadingInterface as off_interface,
+            )
+
+            # Disable and enable offloading before and after the warmup stage of cuda graph.
+            if self.config.fine_grained_activation_offloading:
+                kwargs['pre_warmup_hook'] = off_interface.disable_offload
+                kwargs['post_warmup_hook'] = off_interface.enable_offload
             return kwargs
 
         kwargs = get_make_graphed_callables_kwargs()
@@ -2267,8 +2278,27 @@ def _finish_capturing(self, start_time):
         )
         _set_capture_end()
 
+        from megatron.core.distributed.finalize_model_grads import reset_model_temporary_tensors
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            FineGrainedActivationOffloadingInterface as off_interface,
+        )
+
+        if self.config.fine_grained_activation_offloading:
+            off_interface.reset()
+
+        torch.distributed.barrier()
+        for model_chunk in self.model:
+            model_chunk.zero_grad_buffer()
+        for optimizer in self.optimizers:
+            optimizer.zero_grad()
+        from megatron.core.transformer.moe.moe_logging import get_moe_metrics_tracker
+
+        get_moe_metrics_tracker().clear()
+        reset_model_temporary_tensors(self.config, self.model)
+
         torch.cuda.synchronize()
         self._reset_after_capture()
+
         if FREEZE_GC:
             gc.unfreeze()
         gc.collect()
diff --git a/megatron/core/transformer/dot_product_attention.py b/megatron/core/transformer/dot_product_attention.py
index 69039e0bfd0..26622839c14 100644
--- a/megatron/core/transformer/dot_product_attention.py
+++ b/megatron/core/transformer/dot_product_attention.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 
 import math
@@ -12,6 +12,9 @@
 from megatron.core.fusions.fused_softmax import FusedScaleMaskSoftmax
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.transformer.dot_product_attention_context_parallel import (
+    AttentionFuncionWithContextParallel,
+)
 from megatron.core.transformer.enums import AttnMaskType
 from megatron.core.transformer.module import MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
@@ -54,9 +57,12 @@ def __init__(
 
         self.config: TransformerConfig = config
 
-        assert (
-            self.config.context_parallel_size == 1
-        ), "Context parallelism is only supported by TEDotProductAttention!"
+        if self.config.context_parallel_size > 1:
+            assert attention_dropout is None and self.config.attention_dropout == 0.0, (
+                f'DotProductAttention with context parallelism does not support attention dropout,'
+                f' but got {self.config.context_parallel_size=},'
+                f' {attention_dropout=}, and {self.config.attention_dropout=}.'
+            )
 
         self.layer_number = max(1, layer_number)
         self.attn_mask_type = attn_mask_type
@@ -174,6 +180,19 @@ def forward(
                 self.num_attention_heads_per_partition // self.num_query_groups_per_partition, dim=2
             )
 
+        if self.config.context_parallel_size > 1:
+            output = AttentionFuncionWithContextParallel.apply(
+                query,
+                key,
+                value,
+                attention_mask,
+                self.config.attention_dropout,
+                self.softmax_scale,
+                parallel_state.get_context_parallel_group(),
+            )
+            output = output.view(query.shape[0], query.shape[1], self.hidden_size_per_partition)
+            return output
+
         # [b, np, sq, sk]
         output_size = (query.size(1), query.size(2), query.size(0), key.size(0))
 
diff --git a/megatron/core/transformer/dot_product_attention_context_parallel.py b/megatron/core/transformer/dot_product_attention_context_parallel.py
new file mode 100644
index 00000000000..aaf08d40ade
--- /dev/null
+++ b/megatron/core/transformer/dot_product_attention_context_parallel.py
@@ -0,0 +1,345 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+# Some of this code was adopted from https://github.com/zhuzilin/ring-flash-attention/
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from torch.nn import functional as F
+
+try:
+    import einops
+
+    HAVE_EINOPS = True
+except ImportError:
+    HAVE_EINOPS = False
+
+
+@torch.no_grad
+def eager_attn_fwd(q, k, v, attn_bias, sinks, scale, dropout):
+    """Forward pass for eager attention"""
+
+    # Rearrange query, key, value to (b, h, s, d)
+    b, sq, h, d = q.shape
+    sk = k.shape[1]
+    _q = einops.rearrange(q, 'b s h d -> b h s d')
+    _k = einops.rearrange(k, 'b s h d -> b h d s')
+    _v = einops.rearrange(v, 'b s h d -> b h s d')
+
+    # Compute attention weights
+    attn_w = torch.matmul(_q, _k) * scale
+    attn_w = attn_w + attn_bias
+
+    # Add sinks to attention weights
+    if sinks is None:
+        logits = attn_w
+    else:
+        _sinks = sinks.reshape(1, h, 1, 1).expand(b, -1, sq, 1)
+        logits = torch.cat([attn_w, _sinks], dim=-1)
+
+    # Compute attention scores
+    probs = F.softmax(logits, dim=-1, dtype=logits.dtype)
+    if sinks is None:
+        attn_w = probs
+    else:
+        attn_w = probs[..., :-1]  # Drop the sink
+
+    # Compute attention output
+    attn_output = torch.matmul(attn_w, _v)
+    attn_output = einops.rearrange(attn_output, 'b h s d -> b s h d')
+    attn_output = attn_output.contiguous()
+
+    return attn_output, probs
+
+
+@torch.no_grad
+def eager_attn_bwd(q, k, v, attn_bias, sinks, scale, dropout, attn_output, probs, grad_output):
+    """Backward pass for eager attention"""
+
+    # Rearrange query, key, value to (b, h, s, d)
+    b, sq, h, d = q.shape
+    sk = k.shape[1]
+    _q_T = einops.rearrange(q, 'b s h d -> b h d s')
+    _k_T = einops.rearrange(k, 'b s h d -> b h s d')
+    _v_T = einops.rearrange(v, ' b s h d -> b h d s')
+
+    # Backward pass for score @ value
+    if sinks is None:
+        attn_w = probs
+    else:
+        attn_w = probs[..., :-1]  # Drop the sink
+    grad_output = einops.rearrange(grad_output, 'b s h d -> b h s d')
+    attn_w_T = einops.rearrange(attn_w, ' b h sq sk -> b h sk sq')
+    grad__v = torch.matmul(attn_w_T, grad_output)
+    grad_attn_w = torch.matmul(grad_output, _v_T)
+
+    # Backward pass for softmax
+    if sinks is None:
+        grad_probs = grad_attn_w
+    else:
+        dummy = torch.zeros((b, h, sq, 1), device=q.device, dtype=q.dtype)
+        grad_probs = torch.cat([grad_attn_w, dummy], dim=3)
+    del grad_attn_w
+    grad_logits = torch._softmax_backward_data(
+        grad_probs, probs, -1, probs.dtype
+    )  # [b, h, sq, sk+1]
+
+    # Backward pass for adding sinks
+    if sinks is None:
+        grad_sinks = None
+        grad_attn_w = grad_logits
+    else:
+        grad__sinks = grad_logits[:, :, :, -1]  # [b, h, sq]
+        grad_sinks = einops.rearrange(grad__sinks, 'b h s -> h (b s)').sum(-1)
+        grad_attn_w = grad_logits[:, :, :, :-1].contiguous()  # [b, h, sq, sk]
+
+    # Backward pass for q @ K^T
+    grad_attn_w *= scale
+    grad__q = torch.matmul(grad_attn_w, _k_T)
+    grad__k = torch.matmul(_q_T, grad_attn_w)
+
+    # Rearrange grads to (b, s, h, d)
+    grad_v = einops.rearrange(grad__v, 'b h s d -> b s h d')
+    grad_k = einops.rearrange(grad__k, 'b h d s -> b s h d')
+    grad_q = einops.rearrange(grad__q, 'b h s d -> b s h d')
+    return grad_q, grad_k, grad_v, grad_sinks
+
+
+class AllGatherComm:
+    """All gather communication with async operations"""
+
+    def __init__(self, group=None) -> None:
+        self.group = group
+        self.handles = []
+
+    def all_gather(self, output_tensor: torch.Tensor, input_tensor: torch.Tensor):
+        '''All gather the input tensor to the output tensor'''
+
+        if self.group is None:
+            output_tensor.copy_(input_tensor)
+        else:
+            handle = torch.distributed.all_gather_into_tensor(
+                output_tensor, input_tensor, group=self.group, async_op=True
+            )
+            self.handles.append(handle)
+
+    def wait(self):
+        '''Wait for all gather operations to complete'''
+
+        if self.group is not None:
+            for handle in self.handles:
+                handle.wait()
+            self.handles = []
+
+
+def to_zz_mask_attn_bias(attention_mask, cp_size, nheads, nheads_k, heads_k_stride, device, dtype):
+    '''Convert the attention mask to the attention bias'''
+
+    if cp_size == 1:
+        zz_mask = attention_mask
+    else:
+        chunked = attention_mask.chunk(dim=3, chunks=cp_size * 2)
+        zz_mask = [_x for _p in zip(chunked[:cp_size], reversed(chunked[cp_size:])) for _x in _p]
+        zz_mask = torch.cat(zz_mask, dim=3)
+    attn_bias = torch.zeros(zz_mask.shape, device=device, dtype=dtype)
+    attn_bias.masked_fill_(zz_mask, float('-inf'))
+    attn_bias = attn_bias.expand(-1, heads_k_stride * (nheads // nheads_k), -1, -1)
+    return attn_bias
+
+
+class AttentionFuncionWithContextParallel(torch.autograd.Function):
+    """Native attention function with context parallelism."""
+
+    @staticmethod
+    def forward(ctx, q, k, v, attention_mask, attention_dropout, softmax_scale, pg):
+        '''Forward pass for the native attention function with context parallelism'''
+
+        # Assert einops exists
+        if not HAVE_EINOPS:
+            raise ImportError("einops is required by the attention CP but cannot be imported.")
+
+        # Initialize communication group and constants
+        cp_size = 1
+        if pg is not None:
+            cp_size = torch.distributed.get_world_size(pg)
+        comm = AllGatherComm(group=pg)
+        nheads = q.shape[2]
+        nheads_k = k.shape[2]
+        heads_k_stride = 1
+        assert nheads % nheads_k == 0 and nheads_k % heads_k_stride == 0
+        outs = []
+        probs = []
+
+        # Initialize KV buffers
+        kv_buffer = torch.empty(
+            (2, k.shape[0] * cp_size, k.shape[1], heads_k_stride, k.shape[3]),
+            dtype=k.dtype,
+            device=k.device,
+        )
+        kv_buffer_copy = torch.empty_like(kv_buffer)
+
+        # All-gather first chunk of KV buffers
+        k_0 = k[:, :, :heads_k_stride].contiguous()
+        v_0 = v[:, :, :heads_k_stride].contiguous()
+        comm.all_gather(kv_buffer_copy[0], k_0)
+        comm.all_gather(kv_buffer_copy[1], v_0)
+
+        # Prepare attention bias
+        assert (
+            attention_mask is not None
+        ), "Attention mask is required for the native attention function with context parallelism"
+        attn_bias = to_zz_mask_attn_bias(
+            attention_mask, cp_size, nheads, nheads_k, heads_k_stride, q.device, q.dtype
+        )
+
+        # Iterate over heads
+        for i in range(0, nheads_k, heads_k_stride):
+            # Wait for previous all-gather to complete
+            comm.wait()
+            kv_buffer, kv_buffer_copy = kv_buffer_copy, kv_buffer
+            # All-gather the next portion of KV buffers if not the last iteration
+            if i < nheads_k - heads_k_stride:
+                kvsl = i + heads_k_stride
+                kvsr = kvsl + heads_k_stride
+                send_k = k[:, :, kvsl:kvsr].contiguous()
+                send_v = v[:, :, kvsl:kvsr].contiguous()
+                comm.all_gather(kv_buffer_copy[0], send_k)
+                comm.all_gather(kv_buffer_copy[1], send_v)
+
+            # Prepare query, key, value for attention
+            q_i = q[:, :, i * nheads // nheads_k : (i + heads_k_stride) * nheads // nheads_k]
+            k_i = kv_buffer[0]
+            v_i = kv_buffer[1]
+
+            # Rearrange query, key, value to (b, s, h, d)
+            q_i = einops.rearrange(q_i, 's b h d -> b s h d')
+            k_i = einops.rearrange(k_i, 's b h d -> b s h d')
+            v_i = einops.rearrange(v_i, 's b h d -> b s h d')
+
+            # Forward pass
+            out_i, probs_i = eager_attn_fwd(
+                q_i, k_i, v_i, attn_bias, None, softmax_scale, attention_dropout
+            )
+            outs.append(out_i)
+            probs.append(probs_i)
+
+        # Concatenate outputs and rearrange to (s, b, h, d)
+        out = torch.cat(outs, dim=2)
+        out = einops.rearrange(out, 'b s h d -> s b h d')
+
+        # Save contexts for backward pass
+        ctx.save_for_backward(q, k, v, attention_mask, *outs, *probs)
+        ctx.dropout = attention_dropout
+        ctx.scale = softmax_scale
+        ctx.heads_k_stride = heads_k_stride  # TODO make it configurable
+        ctx.pg = pg
+
+        return out
+
+    @staticmethod
+    def backward(ctx, dout):
+        '''Backward pass for the native attention function with context parallelism'''
+
+        # Initialize or resume constants and communication group
+        q, k, v, attention_mask, *rest = ctx.saved_tensors
+        nheads = q.shape[2]
+        nheads_k = k.shape[2]
+        heads_k_stride = ctx.heads_k_stride
+        assert nheads_k % heads_k_stride == 0
+        outs = rest[: nheads_k // heads_k_stride]
+        probs = rest[nheads_k // heads_k_stride :]
+        pg = ctx.pg
+        cp_size = 1
+        if pg is not None:
+            cp_size = torch.distributed.get_world_size(pg)
+        comm = AllGatherComm(group=pg)
+
+        # Initialize KV buffers
+        kv_buffer = torch.empty(
+            (2, k.shape[0] * cp_size, k.shape[1], heads_k_stride, k.shape[3]),
+            dtype=k.dtype,
+            device=k.device,
+        )
+        kv_buffer_copy = torch.empty_like(kv_buffer)
+
+        # All-gather first chunk of KV buffers
+        dq = []
+        dk = []
+        dv = []
+        k_0 = k[:, :, :heads_k_stride].contiguous()
+        v_0 = v[:, :, :heads_k_stride].contiguous()
+        comm.all_gather(kv_buffer_copy[0], k_0)
+        comm.all_gather(kv_buffer_copy[1], v_0)
+
+        # Prepare attention bias
+        attn_bias = to_zz_mask_attn_bias(
+            attention_mask, cp_size, nheads, nheads_k, heads_k_stride, q.device, q.dtype
+        )
+
+        # Iterate over heads
+        for i in range(0, nheads_k, heads_k_stride):
+            # Slice query and output for this iteration
+            q_slice = slice(i * nheads // nheads_k, (i + heads_k_stride) * nheads // nheads_k)
+            q_i = q[:, :, q_slice]
+            dout_i = dout[:, :, q_slice]
+
+            # Wait for previous all-gather to complete
+            comm.wait()
+            kv_buffer, kv_buffer_copy = kv_buffer_copy, kv_buffer
+
+            # All-gather the next portion of KV buffers if not the last iteration
+            if i < nheads_k - heads_k_stride:
+                kvsl = i + heads_k_stride
+                kvsr = kvsl + heads_k_stride
+                send_k = k[:, :, kvsl:kvsr].contiguous()
+                send_v = v[:, :, kvsl:kvsr].contiguous()
+                comm.all_gather(kv_buffer_copy[0], send_k)
+                comm.all_gather(kv_buffer_copy[1], send_v)
+
+            # Prepare key, value for attention
+            k_i = kv_buffer[0]
+            v_i = kv_buffer[1]
+
+            # Rearrange query, key, value to (b, s, h, d)
+            q_i = einops.rearrange(q_i, 's b h d -> b s h d')
+            k_i = einops.rearrange(k_i, 's b h d -> b s h d')
+            v_i = einops.rearrange(v_i, 's b h d -> b s h d')
+            dout_i = einops.rearrange(dout_i, 's b h d -> b s h d')
+
+            # Backward pass
+            dq_i, _dk_i, _dv_i, _ = eager_attn_bwd(
+                q_i, k_i, v_i, attn_bias, None, ctx.scale, ctx.dropout, outs[i], probs[i], dout_i
+            )
+
+            # Rearrange gradients to (s, b, h, d)
+            dq_i = einops.rearrange(dq_i, 'b s h d -> s b h d')
+            _dk_i = einops.rearrange(_dk_i, 'b s h d -> s b h d')
+            _dv_i = einops.rearrange(_dv_i, 'b s h d -> s b h d')
+            if pg is None:
+                dk_i = _dk_i
+                dv_i = _dv_i
+            else:
+                # Reduce-scatter gradients if CP > 1
+                dk_i = torch.zeros(
+                    (k_i.shape[1] // cp_size, k_i.shape[0], k_i.shape[2], k_i.shape[3]),
+                    device=k_i.device,
+                    dtype=k_i.dtype,
+                )
+                dv_i = torch.zeros(
+                    (v_i.shape[1] // cp_size, v_i.shape[0], v_i.shape[2], v_i.shape[3]),
+                    device=v_i.device,
+                    dtype=v_i.dtype,
+                )
+                torch.distributed.reduce_scatter_tensor(dk_i, _dk_i, group=pg)
+                torch.distributed.reduce_scatter_tensor(dv_i, _dv_i, group=pg)
+
+            # Collect gradients
+            dq.append(dq_i)
+            dk.append(dk_i)
+            dv.append(dv_i)
+
+        # Concatenate gradients and return
+        dq = torch.cat(dq, dim=2)
+        dk = torch.cat(dk, dim=2)
+        dv = torch.cat(dv, dim=2)
+        return dq, dk, dv, None, None, None, None
diff --git a/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py b/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
index 860118b17a3..6c6d5b07a75 100644
--- a/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
+++ b/megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
@@ -18,6 +18,7 @@
 
 import torch
 
+from megatron.core import tensor_parallel
 from megatron.core.extensions.transformer_engine import HAVE_TE
 from megatron.core.models.common.embeddings import (
     RotaryEmbedding,
@@ -112,6 +113,9 @@ def __init__(
         )
 
         assert not config.add_bias_linear, "add_bias_linear is not supported for AbsorbedMLA"
+        assert not (
+            config.tensor_model_parallel_size > 1 and not config.sequence_parallel
+        ), "AbsorbedMLA requires sequence_parallel when tensor_model_parallel_size > 1"
 
         self.query_projection_size = self.config.v_head_dim * self.config.num_attention_heads
         self.q_head_dim = self.config.qk_head_dim + self.config.qk_pos_emb_head_dim
@@ -594,6 +598,7 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
                     cu_seqlens=cu_seqlens_q,
                     mscale=mscale,
                     cp_group=self.pg_collection.cp,
+                    mla_rotary_interleaved=True,
                 )
                 # k_pos_emb:[num_tokens, 1, qk_pos_emb_head_dim]
                 k_pos_emb = apply_rotary_pos_emb(
@@ -603,6 +608,7 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
                     cu_seqlens=cu_seqlens_kv,
                     mscale=mscale,
                     cp_group=self.pg_collection.cp,
+                    mla_rotary_interleaved=True,
                 )
 
                 # query: [num_tokens, n, (kv_lora_rank + qk_pos_emb_head_dim)]
diff --git a/megatron/core/transformer/experimental_attention_variant/dsa.py b/megatron/core/transformer/experimental_attention_variant/dsa.py
index 3734db7043f..5c5f77363dc 100644
--- a/megatron/core/transformer/experimental_attention_variant/dsa.py
+++ b/megatron/core/transformer/experimental_attention_variant/dsa.py
@@ -778,10 +778,12 @@ def __init__(
 
     def _apply_rope(self, x: torch.Tensor, rotary_pos_emb: torch.Tensor, mscale: float):
         """Apply RoPE to the input tensor."""
-        # x_nope [seqlen, batch, *, index_head_dim - qk_pos_emb_head_dim]
         # x_pe   [seqlen, batch, *, qk_pos_emb_head_dim]
-        x_nope, x_pe = torch.split(
-            x, [self.index_head_dim - self.qk_pos_emb_head_dim, self.qk_pos_emb_head_dim], dim=-1
+        # x_nope [seqlen, batch, *, index_head_dim - qk_pos_emb_head_dim]
+        # To align with DeepSeek's implementation,
+        # x_pe is placed at the front, and x_nope is placed at the back.
+        x_pe, x_nope = torch.split(
+            x, [self.qk_pos_emb_head_dim, self.index_head_dim - self.qk_pos_emb_head_dim], dim=-1
         )
         x_pe = apply_rotary_pos_emb(
             x_pe,
@@ -790,9 +792,12 @@ def _apply_rope(self, x: torch.Tensor, rotary_pos_emb: torch.Tensor, mscale: flo
             cu_seqlens=None,
             mscale=mscale,
             cp_group=self.pg_collection.cp,
+            # This flag is for the MLA-style interleaving in RoPE.
+            # Set it to False, as indexer does not apply interleaved RoPE.
+            mla_rotary_interleaved=False,
         )
         # [seqlen, batch, *, index_head_dim]
-        x = torch.cat([x_nope, x_pe], dim=-1)
+        x = torch.cat([x_pe, x_nope], dim=-1)
         return x
 
     def forward_before_topk(
diff --git a/megatron/core/transformer/fsdp_dtensor_checkpoint.py b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
index e408209c778..4dbc6623506 100644
--- a/megatron/core/transformer/fsdp_dtensor_checkpoint.py
+++ b/megatron/core/transformer/fsdp_dtensor_checkpoint.py
@@ -66,6 +66,19 @@ def get_ep_layer_offset(num_experts: int | None = None) -> int:
     return local_expert_offset
 
 
+def get_total_num_experts(num_experts: int | None = None) -> int:
+    """
+    Get the total number of experts for the current model.
+
+    Args:
+        num_experts: Total number of experts in the model. If None, returns 0.
+
+    Returns:
+        The total number of experts.
+    """
+    return num_experts if num_experts else 0
+
+
 def get_expert_index_from_key(key):
     """Extract expert index from various expert key formats.
 
@@ -102,7 +115,7 @@ def handle_experts_in_state_dict(state_dict, num_experts: int | None = None):
         The processed state dictionary with rewritten expert keys.
     """
     local_expert_start = get_ep_layer_offset(num_experts)
-    local_expert_end = num_experts if num_experts else 0
+    local_expert_end = get_total_num_experts(num_experts)
 
     def should_keep_expert_key(expert_index):
         """Determine if this rank should keep this expert key based on expert index"""
diff --git a/megatron/core/transformer/hyper_connection.py b/megatron/core/transformer/hyper_connection.py
new file mode 100644
index 00000000000..64ec3107213
--- /dev/null
+++ b/megatron/core/transformer/hyper_connection.py
@@ -0,0 +1,716 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import math
+from typing import TYPE_CHECKING, Optional, Tuple
+
+import torch
+import torch.nn as nn
+from torch import Tensor
+
+from megatron.core.transformer.module import MegatronModule
+from megatron.core.transformer.transformer_config import TransformerConfig
+from megatron.core.utils import nvtx_decorator
+
+if TYPE_CHECKING:
+    from megatron.core.tensor_parallel.random import CheckpointManager
+
+
+@torch.compile
+def _sinkhorn_iterations(input_logits: Tensor, num_iterations: int, eps: float) -> Tensor:
+    row_max = input_logits.max(dim=-1, keepdim=True).values
+    M = torch.exp(input_logits - row_max)
+    for _ in range(num_iterations):
+        M = M / M.sum(dim=-1, keepdim=True).clamp(min=eps)
+        M = M / M.sum(dim=-2, keepdim=True).clamp(min=eps)
+    return M
+
+
+class SinkhornKnopp(torch.autograd.Function):
+    """Sinkhorn-Knopp projection to doubly stochastic matrix.
+
+    This is an autograd.Function because the iterative forward is re-executed
+    during backward (under torch.enable_grad) so that PyTorch's autograd can
+    differentiate through it without storing all intermediate iteration states.
+    """
+
+    @staticmethod
+    def forward(ctx, input_logits: Tensor, num_iterations: int, eps: float = 1e-6) -> Tensor:
+        """Run Sinkhorn iterations and save inputs for backward recomputation."""
+        M = _sinkhorn_iterations(input_logits, num_iterations, eps)
+        ctx.save_for_backward(input_logits)
+        ctx.num_iterations = num_iterations
+        ctx.eps = eps
+        return M
+
+    @staticmethod
+    def backward(ctx, grad_output: Tensor):
+        """Recompute forward under enable_grad and back-propagate."""
+        (input_logits,) = ctx.saved_tensors
+        with torch.enable_grad():
+            logits = input_logits.detach().requires_grad_(True)
+            M = _sinkhorn_iterations(logits, ctx.num_iterations, ctx.eps)
+            M.backward(grad_output)
+        return logits.grad, None, None
+
+
+def native_sinkhorn(input_logits: Tensor, num_iterations: int, eps: float = 1e-6) -> Tensor:
+    """Native Sinkhorn-Knopp (autograd.Function wrapper)."""
+    return SinkhornKnopp.apply(input_logits, num_iterations, eps)
+
+
+@torch.compile
+def native_h_aggregate(x: Tensor, h_pre: Tensor) -> Tensor:
+    """Native n-stream weighted aggregation: out = sum_j(h_pre_j * x_j)."""
+    return (x * h_pre.unsqueeze(-1)).sum(dim=2)
+
+
+@torch.compile
+def native_h_post_bda(
+    h_res: Tensor, original_residual: Tensor, h_post: Tensor, x: Tensor, bias: Optional[Tensor]
+) -> Tensor:
+    """Native H_res @ residual + H_post * (x [+ bias])."""
+    s, b, n, C = original_residual.shape
+    h_res_batched = h_res.view(s * b, n, n)
+    residual_batched = original_residual.view(s * b, n, C)
+    mixed = torch.bmm(h_res_batched, residual_batched).view(s, b, n, C)
+    x_expanded = h_post.unsqueeze(-1) * x.unsqueeze(2)
+    if bias is not None:
+        bias_expanded = h_post.unsqueeze(-1) * bias.view(1, 1, 1, C)
+        return x_expanded + bias_expanded + mixed
+    return x_expanded + mixed
+
+
+@torch.compile
+def native_proj_rms(x: Tensor, weight: Tensor, eps: float = 1e-6) -> Tuple[Tensor, Tensor]:
+    """Native fused projection + RMS normalization."""
+    proj = torch.matmul(x, weight.t())
+    norm = x.norm(dim=-1, keepdim=True)
+    K = x.shape[-1]
+    v = norm / math.sqrt(K) + eps
+    r = 1.0 / v
+    return proj, r
+
+
+# ============================================================================
+# HyperConnectionModule
+# ============================================================================
+
+
+# TODO: keep hyper connection in fp32 computation
+class HyperConnectionModule(MegatronModule):
+    """
+    Unified mHC (Manifold-Constrained Hyper-Connections) module.
+
+    Implements the complete mHC propagation:
+        x_{l+1} = H_res @ x_l + H_post^T @ F(H_pre @ x_l)
+
+    This module handles:
+    1. Computing learnable mappings: H_pre, H_post, H_res (with Sinkhorn-Knopp projection)
+    2. Aggregation: n-stream → 1-stream (H_pre @ x)
+    3. Expansion: 1-stream → n-stream (H_post^T @ output)
+    4. Residual merge: H_res @ x + expanded_output
+    5. Block-level expand/contract for TransformerBlock boundaries
+
+    Args:
+        config: TransformerConfig with hyper-connection fields
+        layer_number: Current layer index for initialization
+    """
+
+    def __init__(self, config: TransformerConfig, layer_number: int):
+        super().__init__(config)
+        self.config = config
+        self.layer_number = layer_number
+        self.n = config.num_residual_streams
+        self.hidden_size = config.hidden_size
+        self.sinkhorn_iterations = config.mhc_sinkhorn_iterations
+
+        # Projection weights for dynamic mappings
+        # Input: [s, b, n*C] -> Output: n^2 + 2n values per token
+        # - H_pre: n values
+        # - H_post: n values
+        # - H_res: n^2 values (before Sinkhorn projection)
+        self.mapping_proj = nn.Linear(
+            self.n * self.hidden_size, self.n * self.n + 2 * self.n, bias=False
+        )
+
+        init_alpha = config.mhc_init_gating_factor
+        # Learnable scaling factors (Eq. 5 in paper)
+        self.alpha_pre = nn.Parameter(torch.full((1,), init_alpha))
+        self.alpha_post = nn.Parameter(torch.full((1,), init_alpha))
+        self.alpha_res = nn.Parameter(torch.full((1,), init_alpha))
+
+        # Static bias terms
+        self.bias = nn.Parameter(torch.zeros(self.n * self.n + 2 * self.n))
+        self.norm_eps = 1e-6
+
+        # Choose implementation: fused cuTile kernels vs reference modules.
+        # Both paths expose the same call signatures so the rest of the code
+        # is implementation-agnostic.
+        if config.use_fused_mhc:
+            from megatron.core.fusions.fused_mhc_kernels import (
+                fused_h_aggregate,
+                fused_h_post_bda,
+                fused_proj_rms,
+                fused_sinkhorn,
+            )
+
+            self._sinkhorn_op = fused_sinkhorn
+            self._h_aggregate_op = fused_h_aggregate
+            self._h_post_bda_op = fused_h_post_bda
+            self._proj_rms_op = fused_proj_rms
+        else:
+            self._sinkhorn_op = native_sinkhorn
+            self._h_aggregate_op = native_h_aggregate
+            self._h_post_bda_op = native_h_post_bda
+            self._proj_rms_op = native_proj_rms
+
+        self._init_weights()
+
+    def _init_weights(self) -> None:
+        """Initialize weights for stable training."""
+        nn.init.xavier_uniform_(self.mapping_proj.weight)
+
+        # Set sequence_parallel attribute on parameters for gradient synchronization
+        # across TP ranks when sequence_parallel is enabled.
+        # This is required because HyperConnectionModule uses non-TP-aware layers
+        # (nn.Linear, nn.RMSNorm) whose gradients need to be all-reduced.
+        if self.config.sequence_parallel:
+            setattr(self.mapping_proj.weight, 'sequence_parallel', True)
+            setattr(self.alpha_pre, 'sequence_parallel', True)
+            setattr(self.alpha_post, 'sequence_parallel', True)
+            setattr(self.alpha_res, 'sequence_parallel', True)
+            setattr(self.bias, 'sequence_parallel', True)
+
+    def _projection_and_get_norm(self, x: Tensor) -> Tuple[Tensor, Tensor]:
+        """
+        Projection + RMS normalization.
+
+        Args:
+            x: [s, b, n*C] - n-stream hidden states
+        """
+        s, b, nC = x.shape
+        x_2d = x.reshape(s * b, nC)
+        proj, r = self._proj_rms_op(x_2d, self.mapping_proj.weight, self.norm_eps)
+        return proj.view(s, b, -1), r.view(s, b, 1)
+
+    @torch.compile
+    def _compute_h(self, proj: Tensor, r: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
+        """
+        Compute h from projected hidden states and scaling factors.
+
+        Args:
+            proj: [s, b, n^2 + 2n] - projected hidden states
+            r: [s, b, 1] - scaling factors
+
+        Returns:
+            h_pre: [s, b, n] - aggregation weights
+            h_post: [s, b, n] - expansion weights
+            h_res: [s, b, n^2] - residual mixing logits
+        """
+        alpha_ = torch.cat(
+            [
+                self.alpha_pre.expand(self.n),
+                self.alpha_post.expand(self.n),
+                self.alpha_res.expand(self.n * self.n),
+            ],
+            dim=-1,
+        )
+        h = r * proj * alpha_ + self.bias
+        # H_pre = σ(α_pre * (θ_pre @ x̃) + b_pre)
+        h_pre = h[..., : self.n].sigmoid()  # [s, b, n]
+
+        # H_post = 2σ(α_post * (θ_post @ x̃) + b_post)
+        h_post = h[..., self.n : 2 * self.n].sigmoid() * 2  # [s, b, n]
+        h_res = h[..., 2 * self.n :]
+        return h_pre, h_post, h_res
+
+    @nvtx_decorator(message="HyperConnection::compute_mappings")
+    def compute_mappings(self, x: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
+        """
+        Compute mHC mappings from input hidden states.
+
+        Reference: Eq. (5) and (8) in mHC paper
+
+        Args:
+            x: [s, b, n*C] - n-stream hidden states
+
+        Returns:
+            h_pre: [s, b, n] - aggregation weights (sigmoid activated)
+            h_post: [s, b, n] - expansion weights (2*sigmoid activated)
+            h_res: [s, b, n, n] - residual mixing matrix (doubly stochastic)
+        """
+        s, b, _ = x.shape
+        with torch.cuda.nvtx.range("HyperConnection::projection_and_get_norm"):
+            proj, r = self._projection_and_get_norm(x)
+        with torch.cuda.nvtx.range("HyperConnection::compute_h"):
+            h_pre, h_post, h_res = self._compute_h(proj, r)
+        h_res = self._sinkhorn_op(
+            h_res.view(s, b, self.n, self.n), self.sinkhorn_iterations, self.norm_eps
+        )  # [s, b, n, n]
+
+        return h_pre, h_post, h_res
+
+    @torch.compile
+    def _apply_h_post(self, x: Tensor, h_post: Tensor) -> Tensor:
+        """
+        Core implementation of H_post application to a single tensor.
+
+        Computes: H_post^T @ x
+
+        Args:
+            x: Input tensor, can be either:
+               - [s, b, C] - standard hidden states
+               - [C] - bias tensor (will be broadcast)
+            h_post: [s, b, n] - expansion weights
+
+        Returns:
+            output: [s, b, n*C] - expanded tensor
+        """
+        n = self.n
+        s, b, _ = h_post.shape
+
+        if x.dim() == 1:
+            # x is bias with shape [C], need to broadcast to [s, b, 1, C]
+            C = x.shape[0]
+            x_expanded = x.unsqueeze(0).unsqueeze(0).unsqueeze(0).expand(s, b, 1, C)
+        else:
+            # x is [s, b, C]
+            C = x.shape[-1]
+            x_expanded = x.unsqueeze(2)  # [s, b, 1, C]
+
+        # h_post^T @ x : [s, b, n, 1] * [s, b, 1, C] -> [s, b, n, C]
+        # Using broadcast multiply instead of einsum
+        result = h_post.unsqueeze(-1) * x_expanded
+        return result.view(s, b, n * C)
+
+    @nvtx_decorator(message="HyperConnection::apply_h_post")
+    def apply_h_post(
+        self,
+        x_with_bias: Tuple[Tensor, Optional[Tensor]],
+        h_post: Tensor,
+        manager: Optional['CheckpointManager'] = None,
+    ) -> Tuple[Tensor, Optional[Tensor]]:
+        """
+        Apply H_post to x and optionally bias, with optional checkpointing.
+
+        This is the unified entry point that handles both normal execution
+        and checkpoint-based execution for memory efficiency.
+
+        Args:
+            x_with_bias: Tuple of (x, bias) where:
+                - x: [s, b, C] - hidden states
+                - bias: [C] or None - optional bias tensor
+            h_post: [s, b, n] - expansion weights
+            manager: Optional CheckpointManager for checkpoint management.
+                When provided, wraps _apply_h_post with CheckpointWithoutOutput.
+
+        Returns:
+            Tuple of (x_out, bias_out) where:
+                - x_out: [s, b, n*C] - expanded hidden states
+                - bias_out: [s, b, n*C] or None - expanded bias if input bias was not None
+        """
+        x, bias = x_with_bias
+
+        if manager is not None:
+            from megatron.core.tensor_parallel.random import CheckpointWithoutOutput
+
+            # Checkpoint _apply_h_post to discard the output
+            x_out = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(
+                self._apply_h_post, x, h_post
+            )
+
+            # Checkpoint _apply_h_post for bias if not None
+            if bias is not None:
+                bias_out = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(
+                    self._apply_h_post, bias, h_post
+                )
+            else:
+                bias_out = None
+        else:
+            # Normal execution without checkpoint
+            x_out = self._apply_h_post(x, h_post)
+            bias_out = self._apply_h_post(bias, h_post) if bias is not None else None
+
+        return x_out, bias_out
+
+    def aggregate(self, x: Tensor, h_pre: Tensor) -> Tensor:
+        """
+        Aggregate n-stream to 1-stream.
+
+        Args:
+            x: [s, b, n*C] - n-stream hidden states
+            h_pre: [s, b, n] - aggregation weights
+
+        Returns:
+            aggregated: [s, b, C] - single stream hidden states
+        """
+        s, b, _ = x.shape
+        C = self.hidden_size
+        x_streams = x.view(s, b, self.n, C)
+        return self._h_aggregate_op(x_streams, h_pre)
+
+    @torch.compile
+    def apply_h_res(self, h_res: Tensor, residual: Tensor) -> Tensor:
+        """
+        Apply H_res to residual using H_res weights.
+
+        Computes: H_res @ residual
+
+        Args:
+            h_res: [s, b, n, n] - residual mixing matrix
+            residual: [s, b, n*C] - n-stream hidden states
+        """
+        s, b, _ = residual.shape
+        n = self.n
+        C = self.hidden_size
+
+        # Reshape for bmm: [s, b, n, n] -> [s*b, n, n]
+        h_res_batched = h_res.view(s * b, n, n)
+        # [s, b, n*C] -> [s, b, n, C] -> [s*b, n, C]
+        residual_batched = residual.view(s, b, n, C).view(s * b, n, C)
+
+        # Batch matrix multiply: [s*b, n, n] @ [s*b, n, C] -> [s*b, n, C]
+        mixed = torch.bmm(h_res_batched, residual_batched)
+
+        return mixed.view(s, b, n * C)
+
+    def forward(
+        self, hidden_states: Tensor, mhc_recompute_manager: Optional['CheckpointManager'] = None
+    ) -> Tuple[Tensor, Tensor, Tensor]:
+        """
+        Full mHC forward pass.
+
+        Args:
+            hidden_states: [s, b, n*C] - n-stream hidden states
+            mhc_recompute_manager: Optional CheckpointManager for checkpoint management.
+                When provided, uses _forward_with_checkpoint for memory-efficient execution.
+
+        Returns:
+            aggregated: [s, b, C] - aggregated input for layer computation
+            h_res: [s, b, n, n] - residual mixing matrix (for fused kernel)
+            h_post: [s, b, n] - expansion weights
+        """
+        if mhc_recompute_manager is not None:
+            return self._forward_with_checkpoint(hidden_states, mhc_recompute_manager)
+        else:
+            return self._forward_normal(hidden_states)
+
+    def _forward_normal(self, hidden_states: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
+        """
+        Normal forward pass without checkpointing.
+
+        Args:
+            hidden_states: [s, b, n*C] - n-stream hidden states
+
+        Returns:
+            aggregated: [s, b, C] - aggregated input for layer computation
+            h_res: [s, b, n, n] - residual mixing matrix (for fused kernel)
+            h_post: [s, b, n] - expansion weights
+        """
+        # Compute mappings
+        h_pre, h_post, h_res = self.compute_mappings(hidden_states)
+
+        # Aggregate for layer input
+        with torch.cuda.nvtx.range("HyperConnection::aggregate"):
+            aggregated = self.aggregate(hidden_states, h_pre)
+
+        return aggregated, h_res, h_post
+
+    def _forward_with_checkpoint(
+        self, hidden_states: Tensor, manager: 'CheckpointManager'
+    ) -> Tuple[Tensor, Tensor, Tensor]:
+        """
+        Forward pass with checkpointing for memory efficiency.
+
+        compute_mappings is called directly (not checkpointed) since its outputs
+        (h_pre, h_post, h_res) are needed downstream. Only aggregate is wrapped with
+        CheckpointWithoutOutput and auto-registered to the manager.
+        apply_h_res is deferred to fused_h_res_h_post_bda for kernel fusion.
+
+        Args:
+            hidden_states: [s, b, n*C] - n-stream hidden states
+            manager: CheckpointManager for unified recomputation
+
+        Returns:
+            aggregated: [s, b, C] - aggregated input for layer computation
+            h_res: [s, b, n, n] - residual mixing matrix (for fused kernel)
+            h_post: [s, b, n] - expansion weights
+        """
+        from megatron.core.tensor_parallel.random import CheckpointWithoutOutput
+
+        h_pre, h_post, h_res = self.compute_mappings(hidden_states)
+
+        # Checkpoint aggregate - auto-registers to manager
+        aggregated = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(
+            self.aggregate, hidden_states, h_pre
+        )
+
+        return aggregated, h_res, h_post
+
+    # ==================== Block-level utilities ====================
+
+    @staticmethod
+    def input_expand(x: Tensor, n: int) -> Tensor:
+        """
+        Expand 1-stream to n-stream at TransformerBlock entry.
+
+        Simple replication strategy: each stream initialized as a copy of input.
+
+        Args:
+            x: [s, b, C] - single stream hidden states
+            n: Number of residual streams
+
+        Returns:
+            expanded: [s, b, n*C] - n-stream hidden states
+        """
+        s, b, C = x.shape
+        # Replicate input to n streams
+        expanded = x.unsqueeze(2).expand(s, b, n, C).contiguous()
+        return expanded.view(s, b, n * C)
+
+    @staticmethod
+    def output_contract(x: Tensor, n: int) -> Tensor:
+        """
+        Contract n-stream to 1-stream at TransformerBlock exit.
+
+        Simple averaging strategy: average all streams.
+
+        Args:
+            x: [s, b, n*C] - n-stream hidden states
+            n: Number of residual streams
+
+        Returns:
+            contracted: [s, b, C] - single stream hidden states
+        """
+        s, b, nC = x.shape
+        C = nC // n
+        # Average all streams
+        x_streams = x.view(s, b, n, C)
+        contracted = x_streams.mean(dim=2)
+        return contracted
+
+    # ==================== Fused kernel placeholder ====================
+
+    @nvtx_decorator(message="HyperConnection::fused_h_res_h_post_bda")
+    def fused_h_res_h_post_bda(
+        self,
+        h_res: Tensor,
+        original_residual: Tensor,
+        h_post: Tensor,
+        layer_output_with_bias: Tuple[Tensor, Optional[Tensor]],
+        dropout_prob: float,
+        training: bool,
+        fused: bool,
+        manager: Optional['CheckpointManager'] = None,
+    ) -> Tensor:
+        """
+        Fused kernel combining apply_h_res, apply_h_post and bias-dropout-add.
+
+        This is a placeholder for future kernel fusion optimization.
+        Currently implements the operations sequentially using native PyTorch.
+
+        The computation flow is:
+            1. mixed = H_res @ original_residual (apply_h_res)
+            2. expanded = H_post^T @ layer_output (apply_h_post)
+            3. output = dropout(expanded + bias) + mixed (bias-dropout-add)
+
+        Args:
+            h_res: [s, b, n, n] - residual mixing matrix
+            original_residual: [s, b, n*C] - n-stream hidden states (before H_res applied)
+            h_post: [s, b, n] - expansion weights
+            layer_output_with_bias: Tuple of (x, bias) where:
+                - x: [s, b, C] - layer output (attention or MLP output)
+                - bias: [C] or None - optional bias tensor
+            dropout_prob: Dropout probability
+            training: Whether in training mode
+            fused: Whether to use fused BDA implementation
+            manager: Optional CheckpointManager for checkpoint management.
+                When provided, each operation is wrapped with CheckpointWithoutOutput.
+
+        Returns:
+            output: [s, b, n*C] - final output after all operations
+        """
+        if manager is not None:
+            return self._fused_h_res_h_post_bda_with_checkpoint(
+                h_res,
+                original_residual,
+                h_post,
+                layer_output_with_bias,
+                dropout_prob,
+                training,
+                fused,
+                manager,
+            )
+        else:
+            return self._fused_h_res_h_post_bda_native(
+                h_res,
+                original_residual,
+                h_post,
+                layer_output_with_bias,
+                dropout_prob,
+                training,
+                fused,
+            )
+
+    def _fused_h_res_h_post_bda_native(
+        self,
+        h_res: Tensor,
+        original_residual: Tensor,
+        h_post: Tensor,
+        layer_output_with_bias: Tuple[Tensor, Optional[Tensor]],
+        dropout_prob: float,
+        training: bool,
+        fused: bool,
+    ) -> Tensor:
+        """
+        h_res, h_post and bda.
+
+        When dropout is zero (or inference), uses a single fused/reference kernel
+        for H_res @ residual + H_post * (x + bias). Falls back to unfused
+        implementation when dropout is needed.
+
+        Args:
+            h_res: [s, b, n, n] - residual mixing matrix
+            original_residual: [s, b, n*C] - n-stream hidden states
+            h_post: [s, b, n] - expansion weights
+            layer_output_with_bias: Tuple of (x, bias)
+            dropout_prob: Dropout probability
+            training: Whether in training mode
+            fused: Whether to use fused BDA implementation
+
+        Returns:
+            output: [s, b, n*C] - final output
+        """
+        x, bias = layer_output_with_bias
+
+        if dropout_prob == 0.0 or not training:
+            s, b, _ = original_residual.shape
+            n = self.n
+            C = self.hidden_size
+            orig_reshaped = original_residual.view(s, b, n, C)
+            output = self._h_post_bda_op(h_res, orig_reshaped, h_post, x, bias)
+            return output.view(s, b, n * C)
+
+        from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
+
+        with torch.cuda.nvtx.range("HyperConnection::apply_h_res"):
+            mixed = self.apply_h_res(h_res, original_residual)
+        with torch.cuda.nvtx.range("HyperConnection::apply_h_post"):
+            x_expanded = self._apply_h_post(x, h_post)
+            bias_expanded = self._apply_h_post(bias, h_post) if bias is not None else None
+        bda_func = get_bias_dropout_add(training, fused)
+        with torch.cuda.nvtx.range("HyperConnection::bda"):
+            output = bda_func((x_expanded, bias_expanded), mixed, dropout_prob)
+        return output
+
+    @nvtx_decorator(message="HyperConnection::fused_h_res_h_post_bda_with_checkpoint")
+    def _fused_h_res_h_post_bda_with_checkpoint(
+        self,
+        h_res: Tensor,
+        original_residual: Tensor,
+        h_post: Tensor,
+        layer_output_with_bias: Tuple[Tensor, Optional[Tensor]],
+        dropout_prob: float,
+        training: bool,
+        fused: bool,
+        manager: 'CheckpointManager',
+    ) -> Tensor:
+        """
+        Checkpointed variant of _fused_h_res_h_post_bda_native.
+
+        Wraps compute in CheckpointWithoutOutput for activation memory savings.
+        Cannot reuse _native directly because checkpoint requires all args to be
+        positional Tensors; tuple/Optional/scalar args are unpacked or captured
+        via closure instead.
+
+        Args:
+            h_res: [s, b, n, n] - residual mixing matrix
+            original_residual: [s, b, n*C] - n-stream hidden states
+            h_post: [s, b, n] - expansion weights
+            layer_output_with_bias: Tuple of (x, bias)
+            dropout_prob: Dropout probability
+            training: Whether in training mode
+            fused: Whether to use fused BDA implementation
+            manager: CheckpointManager for checkpoint management
+
+        Returns:
+            output: [s, b, n*C] - final output
+        """
+        from megatron.core.tensor_parallel.random import CheckpointWithoutOutput
+
+        x, bias = layer_output_with_bias
+        n = self.n
+        C = self.hidden_size
+
+        # Fast path: no dropout — use fused/reference h_post_bda kernel (same as _native)
+        if dropout_prob == 0.0 or not training:
+
+            def _fused_wrapper(h_res, original_residual, h_post, x, *optional_bias):
+                s, b, _ = original_residual.shape
+                orig_reshaped = original_residual.view(s, b, n, C)
+                b_arg = optional_bias[0] if optional_bias else None
+                return self._h_post_bda_op(h_res, orig_reshaped, h_post, x, b_arg).view(s, b, n * C)
+
+            ckpt = CheckpointWithoutOutput(ckpt_manager=manager)
+            if bias is not None:
+                output = ckpt.checkpoint(_fused_wrapper, h_res, original_residual, h_post, x, bias)
+            else:
+                output = ckpt.checkpoint(_fused_wrapper, h_res, original_residual, h_post, x)
+
+        # Slow path: dropout required — fused kernel does not support dropout,
+        # fall back to sequential apply_h_res + apply_h_post + bda
+        else:
+            from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
+
+            bda_func = get_bias_dropout_add(training, fused)
+            has_bias = bias is not None
+
+            def _native_wrapper(h_res, original_residual, h_post, x, *optional_bias):
+                with torch.cuda.nvtx.range("HyperConnection::apply_h_res"):
+                    mixed = self.apply_h_res(h_res, original_residual)
+                with torch.cuda.nvtx.range("HyperConnection::apply_h_post"):
+                    x_expanded = self._apply_h_post(x, h_post)
+                    if has_bias:
+                        bias_expanded = self._apply_h_post(optional_bias[0], h_post)
+                    else:
+                        bias_expanded = None
+                with torch.cuda.nvtx.range("HyperConnection::bda"):
+                    output = bda_func((x_expanded, bias_expanded), mixed, dropout_prob)
+                return output
+
+            ckpt = CheckpointWithoutOutput(ckpt_manager=manager)
+            if has_bias:
+                output = ckpt.checkpoint(_native_wrapper, h_res, original_residual, h_post, x, bias)
+            else:
+                output = ckpt.checkpoint(_native_wrapper, h_res, original_residual, h_post, x)
+
+        return output
+
+
+# ==================== Checkpoint utilities for mHC ====================
+
+
+class HyperConnectionCheckpoint:
+    """
+    Checkpoint utility for mHC intermediate activations.
+
+    Implements the paper's "recomputing strategy" to reduce memory footprint
+    by discarding intermediate n-stream activations and recomputing on-the-fly.
+    """
+
+    @staticmethod
+    def compute_optimal_block_size(num_layers: int, num_streams: int) -> int:
+        """
+        Compute optimal recomputation block size.
+
+        From paper Eq. (20): L_r^* ≈ sqrt(nL/(n+2))
+
+        Args:
+            num_layers: Total number of transformer layers
+            num_streams: Number of residual streams (n)
+
+        Returns:
+            block_size: Optimal block size for checkpointing
+        """
+        block_size = int(math.sqrt(num_streams * num_layers / (num_streams + 2)))
+        return max(1, block_size)
diff --git a/megatron/core/transformer/linear_cross_entropy.py b/megatron/core/transformer/linear_cross_entropy.py
new file mode 100644
index 00000000000..e7afe326e1c
--- /dev/null
+++ b/megatron/core/transformer/linear_cross_entropy.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+
+from typing import Literal, Optional, Tuple, Union
+
+import torch
+
+from megatron.core import tensor_parallel
+from megatron.core.fusions.fused_linear_cross_entropy import linear_cross_entropy
+
+
+class LinearCrossEntropyModule(tensor_parallel.ColumnParallelLinear):
+    """
+    A module that combines a ColumnParallelLinear layer with fused
+    linear + cross-entropy loss computation over a tensor-parallel vocabulary.
+    """
+
+    def forward(
+        self,
+        input_: torch.Tensor,
+        weight: Optional[torch.Tensor] = None,
+        runtime_gather_output: Optional[bool] = None,
+        output_cross_entropy_loss: bool = False,
+        labels: Optional[torch.Tensor] = None,
+        reduction: Literal["none", "sum", "mean"] = "none",
+        ignore_index: int = -100,
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, Optional[torch.Tensor]]]:
+        """Run either the plain ColumnParallelLinear or fused linear+cross-entropy."""
+        if output_cross_entropy_loss:
+            assert labels is not None, "labels cannot be None when outputting cross-entropy loss."
+            return self._compute_linear_and_cross_entropy_loss(
+                hidden=input_,
+                weight=weight if weight is not None else self.weight,
+                labels=labels,
+                reduction=reduction,
+                ignore_index=ignore_index,
+            )
+
+        # Fall back to standard ColumnParallelLinear forward.
+        # ColumnParallelLinear.forward returns (output, bias) or just output
+        # depending on configuration, so keep the return type as Tensor.
+        return super().forward(input_, weight, runtime_gather_output)
+
+    def _compute_linear_and_cross_entropy_loss(
+        self,
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: Optional[torch.Tensor] = None,
+        reduction: Literal["none", "sum", "mean"] = "none",
+        ignore_index: int = -100,
+    ) -> torch.Tensor:
+        """Compute fused linear + cross-entropy over tensor-parallel vocab."""
+        assert self.config.cross_entropy_loss_fusion, "Cross-entropy loss fusion must be enabled."
+        assert self.config.cross_entropy_fusion_impl == "linear", (
+            "Cross-entropy loss fusion implementation must be 'linear' to use "
+            "_compute_linear_and_cross_entropy_loss."
+        )
+        assert weight is not None, "weight cannot be None when using fused linear cross entropy."
+        assert labels is not None, "labels cannot be None when using fused linear cross entropy."
+
+        # [b s] => [s b]
+        labels = labels.transpose(0, 1).contiguous()
+        loss = linear_cross_entropy(
+            hidden,
+            weight,
+            labels,
+            sequence_parallel=self.sequence_parallel,
+            reduction=reduction,
+            ignore_index=ignore_index,
+            tp_group=self.tp_group,
+        )
+        # If reduction != "none" this will be a scalar; for "none" it should
+        # match [s, b] and can be reshaped back to [b, s].
+        if reduction == "none":
+            loss = loss.view_as(labels).transpose(0, 1).contiguous()
+
+        return loss
diff --git a/megatron/core/transformer/module.py b/megatron/core/transformer/module.py
index 6539ee36105..2d588262676 100644
--- a/megatron/core/transformer/module.py
+++ b/megatron/core/transformer/module.py
@@ -322,6 +322,15 @@ def _get_te_cuda_graph_replay_args(self, *args, **kwargs):
 
         cudagraph_kwargs = kwargs.copy()
         cudagraph_kwargs['is_first_microbatch'] = getattr(self, 'current_microbatch', 0) == 0
+        if self.config.fine_grained_activation_offloading and getattr(
+            self, 'offload_module_in_cuda_graph', False
+        ):
+            from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+                FineGrainedActivationOffloadingInterface as off_interface,
+            )
+
+            cudagraph_kwargs['cuda_graph_stream'] = off_interface.cuda_graph_stream()
+            cudagraph_kwargs['cuda_graph_event'] = off_interface.cuda_graph_event()
         return cudagraph_args, cudagraph_kwargs
 
     def _should_call_local_cudagraph(self, *args, **kwargs):
diff --git a/megatron/core/transformer/moe/experts.py b/megatron/core/transformer/moe/experts.py
index 63a2d074ce9..976c9df3cd6 100644
--- a/megatron/core/transformer/moe/experts.py
+++ b/megatron/core/transformer/moe/experts.py
@@ -5,6 +5,8 @@
 from collections.abc import Callable
 from copy import deepcopy
 from dataclasses import dataclass
+from functools import partial
+from itertools import chain
 from math import ceil
 from typing import Optional, Protocol, Tuple
 
@@ -23,6 +25,12 @@
 from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
     FineGrainedActivationOffloadingInterface as off_interface,
 )
+from megatron.core.tensor_parallel.layers import (
+    _initialize_affine_weight_cpu,
+    _initialize_affine_weight_gpu,
+    set_tensor_model_parallel_attributes,
+)
+from megatron.core.tensor_parallel.utils import divide
 from megatron.core.transformer.mlp import (
     MLP,
     MLPSubmodules,
@@ -33,6 +41,7 @@
 from megatron.core.transformer.moe.moe_utils import (
     ProcessGroupCollection,
     get_align_size_for_quantization,
+    skip_routed_expert_padding,
 )
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.utils import (
@@ -40,6 +49,7 @@
     sharded_state_dict_default,
 )
 from megatron.core.typed_torch import apply_module, not_none
+from megatron.core.utils import is_te_min_version
 
 if HAVE_TE:
     from megatron.core.extensions.transformer_engine import Fp8Padding, Fp8Unpadding
@@ -64,6 +74,469 @@
 logger = logging.getLogger(__name__)
 
 
+class GroupedMLP(MegatronModule):
+    """An efficient implementation of the Experts layer using GroupedGEMM.
+
+    Executes multiple experts in parallel to maximize computational efficiency.
+    """
+
+    # TODO(M4): breaking api, switched from pass in tp_group to pass in pg_collection.
+    def __init__(
+        self,
+        num_local_experts: int,
+        config: TransformerConfig,
+        pg_collection: Optional[ProcessGroupCollection] = None,
+    ):
+        super().__init__(config=config)
+        self.config: TransformerConfig = config
+        self.num_local_experts = num_local_experts
+        gg.assert_grouped_gemm_is_available()
+        assert (
+            config.add_bias_linear == False
+        ), "bias not supported in Grouped GEMM yet, please set '--disable-bias-linear' instead."
+        assert (
+            config.moe_latent_size is None
+        ), "MoE latent projection not supported in GroupedMLP yet."
+
+        self.expert_parallel = config.expert_model_parallel_size > 1
+        if self.config.gated_linear_unit:
+            if self.config.activation_func not in (F.silu, F.gelu):
+                raise ValueError("Activation function must be silu or gelu when using GroupedMLP.")
+
+            @jit_fuser
+            def glu(x):
+                x = torch.chunk(x, 2, dim=-1)
+                return self.config.activation_func(x[0]) * x[1]
+
+            self.activation_func = glu
+        else:
+            self.activation_func = self.config.activation_func
+        self.activation_recompute = (
+            self.config.recompute_granularity == 'selective'
+            and "moe_act" in self.config.recompute_modules
+        )
+        if self.activation_recompute and (self.config.fp8 or self.config.fp4):
+            raise ValueError(
+                "moe_act recompute for fp8 or fp4 cannot work with the legacy GroupedMLP."
+            )
+
+        @jit_fuser
+        def activation_func_with_probs(x, probs):
+            dtype = x.dtype
+            res = self.activation_func(x) * probs
+            return res.to(dtype)
+
+        self.activation_func_with_probs = activation_func_with_probs
+
+        self.ep_group = pg_collection.ep
+        # use pg_collection.expt_tp_group as tensor parallel group in this module.
+        self.tp_group = pg_collection.expt_tp
+        # use pg_collection.expt_dp_group as data parallel group in this module.
+        self.dp_group = pg_collection.expt_dp
+        # How many feature each rank holds for fc1 and fc2, respectively.
+        tp_size = self.tp_group.size()
+        tp_rank = self.tp_group.rank()
+
+        fc1_output_size = self.config.moe_ffn_hidden_size * self.num_local_experts
+        if config.gated_linear_unit:
+            # Project to 4h. If using swiglu double the output width,
+            # see https://arxiv.org/pdf/2002.05202.pdf
+            fc1_output_size *= 2
+        fc1_output_size_per_partition = divide(fc1_output_size, tp_size)
+
+        fc2_input_size = self.config.moe_ffn_hidden_size * self.num_local_experts
+        fc2_input_size_per_partition = divide(fc2_input_size, tp_size)
+
+        # Note: The current kernel implementations of grouped_gemm
+        # does not support transposition with CUTLASS grouped GEMM
+        # (https://github.com/fanshiqing/grouped_gemm/blob/main/csrc/grouped_gemm.cu#L355-L358)
+        # and as a result we avoid allocate the transpose of weights.
+        # Initialize weight.
+        if config.use_cpu_initialization:
+            self.weight1 = Parameter(
+                torch.empty(
+                    self.config.hidden_size,
+                    fc1_output_size_per_partition,
+                    dtype=config.params_dtype,
+                )
+            )
+            self.weight2 = Parameter(
+                torch.empty(
+                    fc2_input_size_per_partition, self.config.hidden_size, dtype=config.params_dtype
+                )
+            )
+            if config.perform_initialization:
+                _initialize_affine_weight_cpu(
+                    self.weight1,
+                    self.config.hidden_size,
+                    fc1_output_size,
+                    fc1_output_size_per_partition,
+                    partition_dim=1,
+                    init_method=config.init_method,
+                    params_dtype=config.params_dtype,
+                    rank=tp_rank,
+                    world_size=tp_size,
+                )
+                _initialize_affine_weight_cpu(
+                    self.weight2,
+                    fc2_input_size,
+                    self.config.hidden_size,
+                    fc2_input_size_per_partition,
+                    partition_dim=0,
+                    init_method=config.output_layer_init_method,
+                    params_dtype=config.params_dtype,
+                    rank=tp_rank,
+                    world_size=tp_size,
+                )
+            else:
+                # Ensure TP attrs are set even when not initializing
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight1, is_parallel=True, dim=1, stride=1
+                )
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight2, is_parallel=True, dim=0, stride=1
+                )
+        else:
+            self.weight1 = Parameter(
+                torch.empty(
+                    self.config.hidden_size,
+                    fc1_output_size_per_partition,
+                    device=torch.cuda.current_device(),
+                    dtype=config.params_dtype,
+                )
+            )
+            self.weight2 = Parameter(
+                torch.empty(
+                    fc2_input_size_per_partition,
+                    self.config.hidden_size,
+                    device=torch.cuda.current_device(),
+                    dtype=config.params_dtype,
+                )
+            )
+            if config.perform_initialization:
+                _initialize_affine_weight_gpu(
+                    self.weight1, config.init_method, partition_dim=1, is_expert=True
+                )
+                _initialize_affine_weight_gpu(
+                    self.weight2, config.output_layer_init_method, partition_dim=0, is_expert=True
+                )
+            else:
+                # Ensure TP attrs are set even when not initializing
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight1, is_parallel=True, dim=1, stride=1
+                )
+                set_tensor_model_parallel_attributes(
+                    tensor=self.weight2, is_parallel=True, dim=0, stride=1
+                )
+        setattr(self.weight1, 'allreduce', not self.expert_parallel)
+        setattr(self.weight2, 'allreduce', not self.expert_parallel)
+
+        def remove_extra_states_check(self, incompatible_keys):
+            """
+            Remove _extra_state from unexpected keys.
+            These keys are for dist ckpt compatibility with SequentialMLP.
+            """
+            keys = deepcopy(incompatible_keys.unexpected_keys)
+            for key in keys:
+                if '_extra_state' in key:
+                    incompatible_keys.unexpected_keys.remove(key)
+
+        self.register_load_state_dict_post_hook(remove_extra_states_check)
+
+    def forward(
+        self,
+        permuted_local_hidden_states: torch.Tensor,
+        tokens_per_expert: torch.Tensor,
+        permuted_probs: torch.Tensor,
+    ):
+        """Forward step of the GroupedMLP."""
+        assert self.config.bf16, "Currently GroupedMLP for MoE only supports bf16."
+        if self.activation_recompute:
+            self.activation_checkpoint = tensor_parallel.CheckpointWithoutOutput()
+
+        if self.config.moe_apply_probs_on_input:
+            assert (
+                self.config.moe_router_topk == 1
+            ), "`moe_apply_probs_on_input` only works with `moe_router_topk`=1."
+            original_dtype = permuted_local_hidden_states.dtype
+            permuted_local_hidden_states = (
+                permuted_probs.unsqueeze(-1) * permuted_local_hidden_states
+            )
+            permuted_local_hidden_states = permuted_local_hidden_states.to(original_dtype)
+            # Probs already applied, so reset to 1.
+            permuted_probs = torch.ones_like(permuted_probs)
+
+        if permuted_local_hidden_states.nelement() != 0:
+            # Reshape the weights for the grouped GEMMs.
+            w1 = self.weight1.view(self.num_local_experts, self.config.hidden_size, -1)
+            w2 = self.weight2.view(self.num_local_experts, -1, self.config.hidden_size)
+
+            fc1_output = gg.ops.gmm(
+                permuted_local_hidden_states, w1, tokens_per_expert, trans_b=False
+            )
+            if self.activation_recompute:
+                intermediate_parallel = self.activation_checkpoint.checkpoint(
+                    self.activation_func_with_probs, fc1_output, permuted_probs.unsqueeze(-1)
+                )
+                fc2_output = gg.ops.gmm(intermediate_parallel, w2, tokens_per_expert, trans_b=False)
+                self.activation_checkpoint.discard_output_and_register_recompute(fc2_output)
+            else:
+                intermediate_parallel = self.activation_func_with_probs(
+                    fc1_output, permuted_probs.unsqueeze(-1)
+                )
+                fc2_output = gg.ops.gmm(intermediate_parallel, w2, tokens_per_expert, trans_b=False)
+        else:
+            # No token is allocated for local experts.
+            assert torch.count_nonzero(tokens_per_expert) == 0
+
+            # Make sure params of experts still have gradients even given zero tokens.
+            w1 = self.weight1.view(self.config.hidden_size, -1)
+            w2 = self.weight2.view(-1, self.config.hidden_size)
+            h = torch.matmul(permuted_local_hidden_states, w1)
+            if self.activation_recompute:
+                h = self.activation_checkpoint.checkpoint(
+                    self.activation_func_with_probs, h, permuted_probs.unsqueeze(-1)
+                )
+                fc2_output = torch.matmul(h, w2)
+                self.activation_checkpoint.discard_output_and_register_recompute(fc2_output)
+            else:
+                h = self.activation_func_with_probs(h, permuted_probs.unsqueeze(-1))
+                fc2_output = torch.matmul(h, w2)
+
+        return fc2_output, None
+
+    def sharded_state_dict(self, prefix='', sharded_offsets=(), metadata=None):
+        """
+        Maps local expert to global experts.
+        The sharded_state_dict for the weight parts are compatible with the SequentialMLP,
+        whereas the optimizer states are not due to the limitation from weight transposing.
+        That is, for finetuning scenario, the checkpoint is compatible with the SequentialMLP.
+
+        When `singleton_local_shards` metadata flag is True, experts are broken down into
+        separate tensors and stored under separate global keys. Additionally, similarly to MLP,
+        layers with GLU activations are broken down into separate `w` and `v` tensors.
+        """
+        singleton_local_shards = (metadata or {}).get('singleton_local_shards', False)
+        sharded_state_dict = {}
+        ep_size = self.ep_group.size()
+        ep_rank = self.ep_group.rank()
+        tp_size = self.tp_group.size()
+        tp_rank = self.tp_group.rank()
+        dp_rank = self.dp_group.rank()
+        num_global_experts = ep_size * self.num_local_experts
+        local_expert_indices_offset = ep_rank * self.num_local_experts
+
+        prepend_axis_num = len(sharded_offsets)
+        replica_id = (0, 0, dp_rank)
+
+        local_ffn_dim_size = (
+            self.weight2.numel() // self.num_local_experts // self.config.hidden_size
+        )
+
+        def _break_into_individual_experts(
+            experts_ten: torch.Tensor,
+            key: str,
+            tp_offset: Tuple[int, int, int],
+            replica_id: ReplicaId,
+        ):
+            """Breaks experts into individual tensors and stores them under separate global keys"""
+            experts_state = []
+            assert len(experts_ten) == self.num_local_experts, (
+                experts_ten.shape,
+                self.num_local_experts,
+            )
+            for local_expert_idx, expert_ten in enumerate(experts_ten):
+                global_expert_idx = local_expert_indices_offset + local_expert_idx
+                expert_key = key.replace(
+                    f'{prefix}experts.', f'{prefix}experts.{global_expert_idx}.'
+                )
+                experts_state.append(
+                    ShardedTensor.from_rank_offsets(
+                        expert_key,
+                        expert_ten.contiguous(),
+                        *sharded_offsets,
+                        tp_offset,
+                        replica_id=replica_id,
+                        prepend_axis_num=prepend_axis_num,
+                    )
+                )
+            return experts_state
+
+        @torch.no_grad()
+        def sh_ten_build_fn(
+            key: str,
+            t: torch.Tensor,
+            replica_id: ReplicaId,
+            flattened_range: Optional[slice],
+            tp_axis: int,
+            with_glu: bool,
+        ):
+            # TODO: write a generic implementation to cover both cases with and without GLU
+            if tp_axis == 1:
+                # weight1
+                if with_glu:
+                    last_dim_size = local_ffn_dim_size * 2
+                else:
+                    last_dim_size = local_ffn_dim_size
+                real_shape = (self.num_local_experts, self.config.hidden_size, last_dim_size)
+            elif tp_axis == 0:
+                # weight2
+                real_shape = (self.num_local_experts, local_ffn_dim_size, self.config.hidden_size)
+                assert with_glu == False
+            else:
+                raise ValueError("tp_axis should be 0 or 1.")
+            if flattened_range is None:
+                # weights
+                t = t.view(real_shape).transpose(-1, -2)
+                # change tp_axis due to the transposing
+                tp_axis = 1 - tp_axis
+                if with_glu:
+                    assert tp_axis == 0, tp_axis
+                    if singleton_local_shards:
+                        w_tensor, v_tensor = torch.chunk(t, 2, -2)
+                        w_key = f'{key}_w'
+                        v_key = f'{key}_v'
+                        sub_states = {
+                            'singleton_local_shards': LocalNonpersistentObject(True),
+                            'data': {
+                                'w': _break_into_individual_experts(
+                                    w_tensor,
+                                    w_key,
+                                    (prepend_axis_num, tp_rank, tp_size),
+                                    replica_id,
+                                ),
+                                'v': _break_into_individual_experts(
+                                    v_tensor,
+                                    v_key,
+                                    (prepend_axis_num, tp_rank, tp_size),
+                                    replica_id,
+                                ),
+                            },
+                        }
+                    else:
+                        local_tensors = torch.chunk(t, 2, -2)
+                        sub_states = [
+                            ShardedTensor.from_rank_offsets(
+                                key,
+                                local_tensors[0].contiguous(),
+                                *sharded_offsets,
+                                (prepend_axis_num, ep_rank, ep_size),
+                                (prepend_axis_num + 1, tp_rank, tp_size * 2),
+                                replica_id=replica_id,
+                                prepend_axis_num=prepend_axis_num,
+                            ),
+                            ShardedTensor.from_rank_offsets(
+                                key,
+                                local_tensors[1].contiguous(),
+                                *sharded_offsets,
+                                (prepend_axis_num, ep_rank, ep_size),
+                                (prepend_axis_num + 1, tp_size + tp_rank, tp_size * 2),
+                                replica_id=replica_id,
+                                prepend_axis_num=prepend_axis_num,
+                            ),
+                        ]
+                else:
+                    if singleton_local_shards:
+                        sub_states = {
+                            'singleton_local_shards': LocalNonpersistentObject(True),
+                            'data': _break_into_individual_experts(
+                                t, key, (prepend_axis_num + tp_axis, tp_rank, tp_size), replica_id
+                            ),
+                        }
+                    else:
+                        sub_states = ShardedTensor.from_rank_offsets(
+                            key,
+                            t.contiguous(),
+                            *sharded_offsets,
+                            (prepend_axis_num, ep_rank, ep_size),
+                            (prepend_axis_num + 1 + tp_axis, tp_rank, tp_size),
+                            replica_id=replica_id,
+                            prepend_axis_num=prepend_axis_num,
+                        )
+            return sub_states  # pylint: disable=possibly-used-before-assignment
+
+        @torch.no_grad()
+        def sh_ten_merge_fn(sub_state_dict, tp_axis: int, with_glu: bool):
+            if tp_axis == 1:
+                # weight1
+                weight_shape = (self.config.hidden_size, -1)
+            elif tp_axis == 0:
+                # weight2
+                weight_shape = (-1, self.config.hidden_size)
+                assert with_glu == False
+            else:
+                raise ValueError("tp_axis should be 0 or 1.")
+            if isinstance(sub_state_dict, dict):
+                assert sub_state_dict['singleton_local_shards']
+                if with_glu:
+                    assert isinstance(sub_state_dict['data'], dict)
+                    sub_state_dict = torch.cat(
+                        (
+                            torch.stack(sub_state_dict['data']['w']),
+                            torch.stack(sub_state_dict['data']['v']),
+                        ),
+                        dim=-2,
+                    )
+                else:
+                    assert isinstance(sub_state_dict['data'], list)
+                    sub_state_dict = torch.stack(sub_state_dict['data'])
+            else:
+                if with_glu:
+                    sub_state_dict = torch.cat(sub_state_dict, -2)
+            return sub_state_dict.transpose(-1, -2).reshape(weight_shape)
+
+        state_dict = self.state_dict(prefix='', keep_vars=True)
+        for name, tensor in state_dict.items():
+            if name == 'weight1':
+                tp_axis = 1
+                with_glu = self.config.gated_linear_unit
+                wkey = f'{prefix}experts.linear_fc1.weight'
+            else:
+                tp_axis = 0
+                with_glu = False
+                wkey = f'{prefix}experts.linear_fc2.weight'
+
+            this_replica_id = list(copy.deepcopy(replica_id))
+
+            sharded_state_dict[f'{prefix}{name}'] = ShardedTensorFactory(
+                wkey,
+                tensor,
+                partial(sh_ten_build_fn, tp_axis=tp_axis, with_glu=with_glu),
+                partial(sh_ten_merge_fn, tp_axis=tp_axis, with_glu=with_glu),
+                tuple(this_replica_id),
+            )
+
+        replica_id = (0, tp_rank, dp_rank)
+        # Add fake _extra_state to be compatible with SequentialMLP
+        for expert_local_idx in range(self.num_local_experts):
+            expert_global_idx = local_expert_indices_offset + expert_local_idx
+            if singleton_local_shards:
+                expert_sharded_offsets = sharded_offsets
+            else:
+                expert_sharded_offsets = (
+                    *sharded_offsets,
+                    (len(sharded_offsets), expert_global_idx, num_global_experts),
+                )
+            for mod in ['linear_fc1', 'linear_fc2']:
+                if singleton_local_shards:
+                    expert_key = f'{prefix}experts.{expert_global_idx}.{mod}._extra_state'
+                else:
+                    expert_key = f'{prefix}experts.{mod}._extra_state'
+                sharded_state_dict[f'{prefix}expert{expert_global_idx}.{mod}._extra_state'] = (
+                    make_sharded_object_for_checkpoint(
+                        None, expert_key, expert_sharded_offsets, replica_id
+                    )
+                )
+
+        return sharded_state_dict
+
+    def backward_dw(self):
+        """Performs backward pass for weight gradients in Experts.
+        Empty implementation for compatibility with SequentialMLP and TEGroupedMLP.
+        """
+        pass
+
+
 class GroupedLinearFc1Interface(Protocol):
     """Interface for linear_fc1 module in TEGroupedMLP."""
 
@@ -237,15 +710,37 @@ def __init__(
             set_save_original_input(self.linear_fc2)
 
         # This is to avoid the CPU overhead of multiple d2h copies
-        if self.offload_expert_fc1:
+        if self.offload_expert_fc1 and not self.config.fp8:
             from megatron.core.extensions.transformer_engine import set_save_original_input
 
             set_save_original_input(self.linear_fc1)
 
+        # Fused implementation with Transformer Engine op fuser API
+        if self.config.use_transformer_engine_op_fuser:
+            assert (
+                self._is_fused_impl_supported()
+            ), "Fused GroupedMLP is not supported for this configuration."
+        self._with_fused_impl: bool = self.config.use_transformer_engine_op_fuser
+        self._fused_ops: Optional[Tuple[torch.nn.Module]] = None
+        if (
+            self.config.gated_linear_unit
+            and self.config.moe_mlp_glu_interleave_size is not None
+            and not self._with_fused_impl
+        ):
+            logger.warning(
+                "`moe_mlp_glu_interleave_size=%s` is enabled, but fused MoE MLP implementation "
+                "is not supported for this configuration. The non-fused path may incur extra "
+                "tensor reordering/copy overhead each forward pass.",
+                self.config.moe_mlp_glu_interleave_size,
+            )
+
         if self.config.fp8 or self.config.fp4:
             assert HAVE_TE, "FP8 and FP4 requires TE."
-            self.quantization_padding = Fp8Padding(self.num_local_experts)
-            self.quantization_unpadding = Fp8Unpadding(self.num_local_experts)
+            align_size = 256 if self._with_fused_impl else None
+            self.quantization_padding = Fp8Padding(self.num_local_experts, align_size=align_size)
+            self.quantization_unpadding = Fp8Unpadding(
+                self.num_local_experts, align_size=align_size
+            )
 
     @staticmethod
     def _apply_bias(intermediate_parallel, bias_parallel, tokens_per_expert, permuted_probs):
@@ -267,6 +762,201 @@ def _apply_bias(intermediate_parallel, bias_parallel, tokens_per_expert, permute
             .to(intermediate_parallel.dtype)
         )
 
+    def _is_fused_impl_supported(self) -> bool:
+        """Check if the TE op fuser supports implementing this module."""
+
+        # Check Transformer Engine installation
+        if not HAVE_TE:
+            return False  # Transformer Engine is not available
+        try:
+            from transformer_engine.pytorch.ops import GroupedLinear, ScaledSwiGLU
+        except ImportError:
+            return False  # Transformer Engine version is too old
+
+        if not is_te_min_version("2.14.0"):
+            return False
+
+        # Check for unsupported features
+        if self.tp_group.size() > 1:
+            return False  # Tensor parallelism is not supported
+        if self.offload_expert_fc1 or self.offload_moe_act:
+            return False  # Fine-grained activation offloading is not supported
+        if self.config.moe_apply_probs_on_input:
+            return False  # Pre-multiplying probs is not supported
+
+        # Check grouped linear modules
+        if not isinstance(self.linear_fc1, te.pytorch.GroupedLinear):
+            return False
+        if not isinstance(self.linear_fc2, te.pytorch.GroupedLinear):
+            return False
+
+        # Check activation
+        if self.activation_func != F.silu or not self.config.gated_linear_unit:
+            return False  # Expected SwiGLU activation
+
+        return True
+
+    def _make_fused_ops(self) -> torch.nn.Module:
+        """Construct fused module for FC1, activation, and FC2."""
+
+        # Container for fusible ops
+        ops = te.pytorch.ops.Sequential()
+
+        # Check if there are 1 or "num_gemms" params in the GroupedLinear module.
+        fc1_single_grouped_weight = self.linear_fc1.single_grouped_weight
+        fc1_weight_dtype = (
+            self.linear_fc1.weight.dtype
+            if fc1_single_grouped_weight
+            else self.linear_fc1.weight0.dtype
+        )
+        fc2_single_grouped_weight = self.linear_fc2.single_grouped_weight
+        fc2_weight_dtype = (
+            self.linear_fc2.weight.dtype
+            if fc2_single_grouped_weight
+            else self.linear_fc2.weight0.dtype
+        )
+        fc1_single_grouped_bias = self.linear_fc1.single_grouped_bias
+        fc2_single_grouped_bias = self.linear_fc2.single_grouped_bias
+
+        # TODO:ksivamani: Why meta device?
+        op = te.pytorch.ops.GroupedLinear(
+            self.linear_fc1.num_gemms,
+            self.linear_fc1.in_features,
+            self.linear_fc1.out_features,
+            bias=self.linear_fc1.use_bias,
+            device=torch.cuda.current_device(),
+            dtype=fc1_weight_dtype,
+            accumulate_into_main_grad=self.linear_fc1.fuse_wgrad_accumulation,
+            single_grouped_weight=fc1_single_grouped_weight,
+            single_grouped_bias=fc1_single_grouped_bias,
+            delay_wgrad_compute=self.config.delay_wgrad_compute,
+        )
+
+        # Copy the weights from GroupedLinear module to GroupedLinear op.
+        if fc1_single_grouped_weight:
+            setattr(op, "weight", getattr(self.linear_fc1, "weight"))
+
+        for idx in range(self.linear_fc1.num_gemms):
+            if not fc1_single_grouped_weight:
+                setattr(op, f"weight{idx}", getattr(self.linear_fc1, f"weight{idx}"))
+            if self.linear_fc1.use_bias and not fc1_single_grouped_bias:
+                setattr(op, f"bias{idx}", getattr(self.linear_fc1, f"bias{idx}"))
+        if self.linear_fc1.use_bias and fc1_single_grouped_bias:
+            setattr(op, "bias", getattr(self.linear_fc1, "bias"))
+        ops.append(op)
+
+        # Activation and post-multiply probs
+        op = te.pytorch.ops.ScaledSwiGLU(
+            glu_interleave_size=self.config.moe_mlp_glu_interleave_size
+        )
+        ops.append(op)
+
+        # FC2
+        op = te.pytorch.ops.GroupedLinear(
+            self.linear_fc2.num_gemms,
+            self.linear_fc2.in_features,
+            self.linear_fc2.out_features,
+            bias=self.linear_fc2.use_bias,
+            device=torch.cuda.current_device(),
+            dtype=fc2_weight_dtype,
+            accumulate_into_main_grad=self.linear_fc2.fuse_wgrad_accumulation,
+            single_grouped_weight=fc2_single_grouped_weight,
+            single_grouped_bias=fc2_single_grouped_bias,
+            delay_wgrad_compute=self.config.delay_wgrad_compute,
+        )
+
+        # Copy the weights from GroupedLinear module to GroupedLinear op.
+        if fc2_single_grouped_weight:
+            setattr(op, "weight", getattr(self.linear_fc2, "weight"))
+
+        for idx in range(self.linear_fc2.num_gemms):
+            if not fc2_single_grouped_weight:
+                setattr(op, f"weight{idx}", getattr(self.linear_fc2, f"weight{idx}"))
+            if self.linear_fc2.use_bias and not fc2_single_grouped_bias:
+                setattr(op, f"bias{idx}", getattr(self.linear_fc2, f"bias{idx}"))
+        if self.linear_fc2.use_bias and fc2_single_grouped_bias:
+            setattr(op, "bias", getattr(self.linear_fc2, "bias"))
+        ops.append(op)
+
+        # Emulate submodule pre-forward hooks
+        ops.register_forward_pre_hook(self._make_fused_impl_pre_forward_hook())
+
+        return ops
+
+    def _make_fused_impl_pre_forward_hook(self) -> Callable:
+        """Make function that calls submodule pre-forward callback hooks.
+
+        This is intended for compatibility with
+        DistributedDataParallel hooks that trigger parameter
+        all-gathers. It does not support general pre-forward hooks
+        since they may manipulate intermediate tensors that are never
+        instantiated by the fused implementation.
+
+        """
+
+        def forward_pre_hook(module, *_) -> None:
+            for submodule in chain(self.linear_fc1.modules(), self.linear_fc2.modules()):
+                for hook in submodule._forward_pre_hooks.values():
+                    # Assume that hook does not interact with input
+                    ret = hook(submodule, None)
+                    if ret is not None:
+                        raise RuntimeError(
+                            f"Applying a fused implementation for {self.__class__.__name__}, "
+                            f"but a {submodule.__class__.__name__} submodule "
+                            "has a pre-forward hook that modifies the input tensor."
+                        )
+
+        return forward_pre_hook
+
+    def _fused_forward(
+        self,
+        permuted_local_hidden_states: torch.Tensor,
+        tokens_per_expert: torch.Tensor,
+        permuted_probs: torch.Tensor,
+    ) -> torch.Tensor:
+        """Forward pass using Transformer Engine operation fuser API."""
+
+        # Construct fused impl if needed
+        # Note: We initialize during the first forward pass in case
+        # the params are modified after the constructor.
+        # Note: The fused impl is stored in a tuple to avoid
+        # registering submodules.
+        if self._fused_ops is None:
+            self._fused_ops = (self._make_fused_ops(),)
+        (ops,) = self._fused_ops
+
+        # Apply padding if needed
+        unpadded_tokens_per_expert = None
+        if skip_routed_expert_padding(self.config):
+            pass
+        elif self.config.fp8 or self.config.fp4:
+            tokens_per_expert = tokens_per_expert.tolist()
+            unpadded_tokens_per_expert = tokens_per_expert
+            permuted_local_hidden_states, tokens_per_expert = self.quantization_padding(
+                permuted_local_hidden_states, tokens_per_expert
+            )
+            permuted_probs, _ = self.quantization_padding(
+                permuted_probs.unsqueeze(-1), unpadded_tokens_per_expert
+            )
+            permuted_probs = permuted_probs.squeeze(-1)
+            tokens_per_expert = torch.tensor(
+                tokens_per_expert, dtype=torch.int, device=permuted_probs.device
+            )
+
+        # Call fused impl
+        output = ops(
+            permuted_local_hidden_states,
+            tokens_per_expert,  # FC1
+            permuted_probs,  # Scaled SwiGLU
+            tokens_per_expert,  # FC2
+        )
+
+        # Remove padding if needed
+        if unpadded_tokens_per_expert is not None:
+            output = self.quantization_unpadding(output, unpadded_tokens_per_expert)
+
+        return output
+
     def bias_act_func(self, intermediate_parallel, bias_parallel, permuted_probs):
         """
         Applies bias and activation function to the output of linear_fc1.
@@ -341,17 +1031,29 @@ def forward(
         Return:
             output (torch.Tensor): The output of the local experts.
         """
+
+        # Call fused impl if enabled
+        if self._with_fused_impl:
+            output = self._fused_forward(
+                permuted_local_hidden_states, tokens_per_expert, permuted_probs
+            )
+            output_bias = None
+            return output, output_bias
+
+        # Apply padding if needed
+        unpadded_tokens_per_expert = None
         tokens_per_expert: list[int] = tokens_per_expert.tolist()
-        if self.config.fp8 or self.config.fp4:
-            actual_tokens_per_expert = tokens_per_expert
+        permuted_probs = permuted_probs.unsqueeze(-1)
+        if skip_routed_expert_padding(self.config):
+            pass
+        elif self.config.fp8 or self.config.fp4:
+            unpadded_tokens_per_expert = tokens_per_expert
             permuted_local_hidden_states, tokens_per_expert = self.quantization_padding(
                 permuted_local_hidden_states, tokens_per_expert
             )
             permuted_probs, _ = self.quantization_padding(
-                permuted_probs.unsqueeze(-1), actual_tokens_per_expert
+                permuted_probs, unpadded_tokens_per_expert
             )
-        else:
-            permuted_probs = permuted_probs.unsqueeze(-1)
 
         if self.config.moe_apply_probs_on_input:
             assert (
@@ -363,43 +1065,129 @@ def forward(
             # Probs already applied, so reset to 1.
             permuted_probs = torch.ones_like(permuted_probs)
 
-        with off_interface(
+        expert_fc1_manager = off_interface(
             self.offload_expert_fc1, permuted_local_hidden_states, "expert_fc1"
-        ) as permuted_local_hidden_states:
+        )
+        with expert_fc1_manager as permuted_local_hidden_states:
             fc1_output, bias_parallel = apply_module(self.linear_fc1)(
                 permuted_local_hidden_states, tokens_per_expert
             )
-        if self.offload_expert_fc1:
-            fc1_output = off_interface.group_commit(
-                fc1_output,
-                name="expert_fc1",
-                forced_released_tensors=[permuted_local_hidden_states],
+        fc1_output = expert_fc1_manager.group_offload(
+            fc1_output,
+            forced_released_tensors=[permuted_local_hidden_states],
+            delay_offload=self.config.delay_offload_until_cuda_graph,
+        )
+
+        def bias_act_func(intermediate_parallel, bias_parallel, permuted_probs):
+
+            # Whether activation function is interleaved GLU
+            with_glu_interleaving = (
+                self.config.gated_linear_unit
+                and self.config.moe_mlp_glu_interleave_size is not None
             )
 
+            def remove_glu_interleaving(x: torch.Tensor) -> torch.Tensor:
+                """Reorder tensor so gate and linear units are contiguous.
+
+                Should only be applied if the activation function is
+                an interleaved GLU.
+
+                """
+                shape = x.size()
+                interleave_size = self.config.moe_mlp_glu_interleave_size
+                x = x.reshape(-1, shape[-1] // (2 * interleave_size), 2, interleave_size)
+                x = x.transpose(1, 2).contiguous()
+                x = x.view(shape)
+                return x
+
+            if self.config.use_te_activation_func:
+                if bias_parallel is not None:
+                    intermediate_parallel = intermediate_parallel + bias_parallel
+                if with_glu_interleaving:
+                    intermediate_parallel = remove_glu_interleaving(intermediate_parallel)
+                intermediate_parallel = self.activation_func(intermediate_parallel)
+                if permuted_probs is not None:
+                    original_dtype = intermediate_parallel.dtype
+                    intermediate_parallel = intermediate_parallel * permuted_probs
+                    intermediate_parallel = intermediate_parallel.to(original_dtype)
+            elif self.config.bias_activation_fusion and not with_glu_interleaving:
+                if self.activation_func == F.silu and self.config.gated_linear_unit:
+                    # dtype is handled inside the fused kernel
+                    intermediate_parallel = weighted_bias_swiglu_impl(
+                        intermediate_parallel,
+                        bias_parallel,
+                        permuted_probs,
+                        self.config.activation_func_fp8_input_store,
+                    )
+                elif self.activation_func == quick_gelu and self.config.gated_linear_unit:
+                    intermediate_parallel = weighted_bias_quick_geglu_impl(
+                        intermediate_parallel,
+                        bias_parallel,
+                        permuted_probs,
+                        self.config.activation_func_fp8_input_store,
+                        self.config.glu_linear_offset,
+                        self.config.activation_func_clamp_value,
+                    )
+                else:
+                    raise ValueError(
+                        "Only support fusion of swiglu and quick_gelu in TEGroupedMLP."
+                    )
+            elif (
+                self.activation_func == squared_relu and self.config.use_fused_weighted_squared_relu
+            ):
+                assert bias_parallel is None
+                intermediate_parallel = weighted_squared_relu_impl(
+                    intermediate_parallel, permuted_probs
+                )
+            else:
+                if self.config.gated_linear_unit:
+
+                    def glu(x):
+                        if with_glu_interleaving:
+                            x = remove_glu_interleaving(x)
+                        x_glu, x_linear = torch.chunk(x, 2, dim=-1)
+                        if (val := self.config.activation_func_clamp_value) is not None:
+                            x_glu = x_glu.clamp(min=None, max=val)
+                            x_linear = x_linear.clamp(min=-val, max=val)
+                        return self.config.activation_func(x_glu) * (
+                            x_linear + self.config.glu_linear_offset
+                        )
+
+                    intermediate_parallel = glu(intermediate_parallel)
+                else:
+                    intermediate_parallel = self.activation_func(intermediate_parallel)
+                original_dtype = intermediate_parallel.dtype
+                intermediate_parallel = intermediate_parallel * permuted_probs
+                intermediate_parallel = intermediate_parallel.to(original_dtype)
+            return intermediate_parallel
+
+        moe_act_manager = off_interface(self.offload_moe_act, fc1_output, "moe_act")
         if self.activation_recompute:
             self.activation_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with off_interface(self.offload_moe_act, fc1_output, "moe_act") as fc1_output:
+            with moe_act_manager as fc1_output:
                 bias_act_output = self.activation_checkpoint.checkpoint(
                     self.bias_act_func, fc1_output, bias_parallel, permuted_probs
                 )
         else:
-            with off_interface(self.offload_moe_act, fc1_output, "moe_act") as fc1_output:
-                bias_act_output = self.bias_act_func(fc1_output, bias_parallel, permuted_probs)
+            with moe_act_manager as fc1_output:
+                bias_act_output = bias_act_func(fc1_output, bias_parallel, permuted_probs)
+
         output, output_bias = apply_module(self.linear_fc2)(bias_act_output, tokens_per_expert)
         if self.activation_recompute:
             self.activation_checkpoint.discard_output_and_register_recompute(output)
 
         # Delay the offload of the moe act until after the linear_fc2 has been computed
         # to make sure the fc1_output is reloaded to GPU before recomputing moe_act.
-        if self.offload_moe_act:
-            output = off_interface.group_commit(
-                output, name="moe_act", forced_released_tensors=[fc1_output]
-            )
+        output = moe_act_manager.group_offload(
+            output,
+            forced_released_tensors=[fc1_output],
+            delay_offload=self.config.delay_offload_until_cuda_graph,
+        )
         output = self._apply_bias(output, output_bias, tokens_per_expert, permuted_probs)
 
         # upad and concat the output
-        if self.config.fp8 or self.config.fp4:
-            output = self.quantization_unpadding(output, actual_tokens_per_expert)
+        if unpadded_tokens_per_expert is not None:
+            output = self.quantization_unpadding(output, unpadded_tokens_per_expert)
 
         output_bias = None
 
@@ -453,6 +1241,14 @@ def backward_dw(self):
         If an error occurs during execution, it is caught and re-raised with a
         descriptive message.
         """
+        if self._with_fused_impl and self.config.delay_wgrad_compute:
+            if self._fused_ops is not None:
+                (seq,) = self._fused_ops
+                fused_children = list(seq.children())
+                assert len(fused_children) >= 3, "expected FC1, activation, FC2 in fused TE ops"
+                fused_children[2].backward_dw()
+                fused_children[0].backward_dw()
+            return
         self.linear_fc2.backward_dw()
         self.linear_fc1.backward_dw()
 
diff --git a/megatron/core/transformer/moe/moe_layer.py b/megatron/core/transformer/moe/moe_layer.py
index 85f2df2e043..9aa50653630 100644
--- a/megatron/core/transformer/moe/moe_layer.py
+++ b/megatron/core/transformer/moe/moe_layer.py
@@ -339,6 +339,19 @@ def __init__(
         self.cudagraph_tensor_store = MoECudaGraphTensorStore()
         self.fwd_execution_map = ["route", "expert_compute", "postprocess"]
 
+        # Setup events and streams for delayed wgrad computation.
+        self.setup_delayed_wgrad_for_dispatch_backward_overlap()
+
+    def setup_delayed_wgrad_for_dispatch_backward_overlap(self):
+        """Initializes CUDA events and streams for overlapping expert
+        weight gradient computation with dispatch backward.
+        """
+        self._delayed_wgrad_event: Optional[torch.cuda.Event] = None
+        self._delayed_wgrad_stream: Optional[torch.cuda.Stream] = None
+        if self.config.overlap_dispatch_backward_with_experts_wgrad:
+            self._delayed_wgrad_event = torch.cuda.Event()
+            self._delayed_wgrad_stream = torch.cuda.Stream(device="cuda")
+
     def _setup_inference_mode(self, pg_collection):
         """Set up inference-optimized token dispatcher and state.
 
@@ -429,6 +442,8 @@ def dispatch(self, hidden_states: torch.Tensor, probs: torch.Tensor):
         tokens and their associated probabilities to the devices hosting their assigned
         experts.
         """
+        if self.config.overlap_dispatch_backward_with_experts_wgrad:
+            hidden_states = _RegisterDelayedWgradForExperts.apply(self, hidden_states)
         return self.token_dispatcher.token_dispatch(hidden_states, probs)
 
     @maybe_skip_or_early_return_by_cudagraph("shared_experts_compute")
@@ -467,6 +482,10 @@ def routed_experts_compute(self, hidden_states: torch.Tensor, probs: torch.Tenso
         for each expert. It then passes the tokens through the local experts.
         The output from the experts is preprocessed for the combine step.
         """
+        if self.config.overlap_dispatch_backward_with_experts_wgrad:
+            hidden_states = _RecordExpertDgradCompletion.apply(
+                self._delayed_wgrad_event, hidden_states
+            )
         dispatched_input, tokens_per_expert, permuted_probs = (
             self.token_dispatcher.dispatch_postprocess(hidden_states, probs)
         )
@@ -612,24 +631,24 @@ def custom_forward(hidden_states, intermediate_tensors=None, padding_mask=None):
 
     def backward_dw(self, routed_experts: bool = True, shared_experts: bool = False):
         """Compute weight gradients for experts and shared experts."""
+        from megatron.core.pipeline_parallel.utils import get_comm_stream
+
         # TODO(Wohox): replace the "routed_experts" and "shared_experts" arguments with better
         # naming to better explain that they are actually from different fine-grained callables,
         # or use scanning to decide which backward_dw should be called.
         if routed_experts:
             self.experts.backward_dw()
-            if self.config.moe_latent_size:
+            if self.config.moe_latent_size and self.config.overlap_moe_expert_parallel_comm:
                 # TODO(Wohox): fc2_latent_proj forward and backward are executed in comm stream,
                 # so we execute its backward_dw in the comm stream too. But this may harm the
                 # EP overlap performance. Better to check if there is a better way to handle this.
-                from megatron.core.pipeline_parallel.utils import get_comm_stream
-
                 comm_stream = get_comm_stream()
                 with torch.cuda.stream(comm_stream):
                     self.fc2_latent_proj.backward_dw()
         if shared_experts:
             if self.use_shared_expert and not self.shared_expert_overlap:
                 self.shared_experts.backward_dw()
-            if self.config.moe_latent_size:
+            if self.config.moe_latent_size and self.config.overlap_moe_expert_parallel_comm:
                 self.fc1_latent_proj.backward_dw()
 
     def set_for_recompute_pre_mlp_layernorm(self):
@@ -640,3 +659,66 @@ def set_for_recompute_pre_mlp_layernorm(self):
             from megatron.core.extensions.transformer_engine import set_save_original_input
 
             set_save_original_input(self.shared_experts.linear_fc1)
+
+
+class _RecordExpertDgradCompletion(torch.autograd.Function):
+    """Autograd function that records a CUDA event when expert data gradients finish.
+
+    Placed in the forward graph just before the expert computation so that during
+    the backward pass, when the expert dgrad completes, we record an event. The
+    subsequent ``_RegisterDelayedWgradForExperts`` waits on this event before
+    launching the delayed wgrad computation on a separate CUDA stream.
+    """
+
+    @staticmethod
+    def forward(ctx, event: torch.cuda.Event, *inputs):
+        """Forward pass that stores the event and passes through inputs unchanged."""
+        ctx.event = event
+        return inputs[0] if len(inputs) == 1 else inputs
+
+    @staticmethod
+    def backward(ctx, *grad_outputs):
+        """Backward pass that records the event when expert dgrad completes."""
+        ctx.event.record(torch.cuda.current_stream())
+        ctx.event = None
+        return (None,) + grad_outputs
+
+
+class _RegisterDelayedWgradForExperts(torch.autograd.Function):
+    """Autograd function that orchestrates delayed wgrad computation for MoE experts.
+
+    Placed in the forward graph at the dispatch boundary. During the backward pass,
+    this function:
+      1. Records an event on the current (backward) stream to signal the dgrad is done.
+      2. Executes the delayed wgrad computation on a dedicated CUDA stream.
+      3. Waits for the wgrad computation to complete.
+      4. Invokes the registered gradient processing callback (e.g., FSDP reduce-scatter).
+    """
+
+    @staticmethod
+    def forward(ctx, module: MoELayer, *inputs):
+        """Forward pass that stores the MoE module and passes through inputs unchanged."""
+        ctx.module = module
+        return inputs[0] if len(inputs) == 1 else inputs
+
+    @staticmethod
+    def backward(ctx, *grad_outputs):
+        """Backward pass that executes delayed wgrad computation on a separate stream."""
+        module = ctx.module
+        event = module._delayed_wgrad_event
+        wgrad_stream = module._delayed_wgrad_stream
+
+        wgrad_stream.wait_event(event)
+        with torch.cuda.stream(wgrad_stream):
+            with torch.cuda.nvtx.range("delayed_expert_wgrad"):
+                module.backward_dw(routed_experts=True, shared_experts=False)
+            event.record(wgrad_stream)
+
+        torch.cuda.current_stream().wait_event(event)
+
+        for param in module.parameters():
+            if getattr(param, "post_wgrad_grad_acc_hook", None) is not None:
+                param.post_wgrad_grad_acc_hook()
+
+        ctx.module = None
+        return (None,) + grad_outputs
diff --git a/megatron/core/transformer/moe/moe_logging.py b/megatron/core/transformer/moe/moe_logging.py
new file mode 100644
index 00000000000..b1f2b27000b
--- /dev/null
+++ b/megatron/core/transformer/moe/moe_logging.py
@@ -0,0 +1,379 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""MoE metrics tracking and logging.
+
+Collects per-layer MoE metrics during forward passes, synchronizes them across
+distributed ranks, and writes scalar summaries to TensorBoard / W&B.
+
+Usage:
+    tracker = get_moe_metrics_tracker()
+
+    # In router forward pass:
+    tracker.record("load_balancing_loss", loss, layer_number=1, num_layers=32,
+                   reduce_group=tp_cp_group)
+
+    # At end of training step:
+    log_str = tracker.report(
+        loss_scale=1 / num_microbatches,
+        iteration=step,
+        writer=tb_writer,
+        num_layers=32,
+    )
+"""
+
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Union
+
+import torch
+
+from megatron.core import parallel_state
+from megatron.core.process_groups_config import ProcessGroupCollection
+
+
+@dataclass
+class MetricEntry:
+    """Per-layer metric with distributed reduction configuration."""
+
+    values: torch.Tensor
+    reduce_group: Optional[torch.distributed.ProcessGroup] = None
+    avg_group: Optional[torch.distributed.ProcessGroup] = None
+    needs_dp_avg: bool = True
+
+
+# ---------------------------------------------------------------------------
+# Module-level global tracker (follows parallel_state / global_vars pattern)
+# ---------------------------------------------------------------------------
+_MOE_METRICS_TRACKER: Optional['MoEMetricsTracker'] = None
+
+
+def get_moe_metrics_tracker() -> 'MoEMetricsTracker':
+    """Return the global MoE metrics tracker, creating it lazily if needed."""
+    global _MOE_METRICS_TRACKER
+    if _MOE_METRICS_TRACKER is None:
+        _MOE_METRICS_TRACKER = MoEMetricsTracker()
+    return _MOE_METRICS_TRACKER
+
+
+def set_moe_metrics_tracker(tracker: 'MoEMetricsTracker') -> None:
+    """Set the global MoE metrics tracker."""
+    global _MOE_METRICS_TRACKER
+    _MOE_METRICS_TRACKER = tracker
+
+
+def destroy_moe_metrics_tracker() -> None:
+    """Reset the global MoE metrics tracker to ``None``."""
+    global _MOE_METRICS_TRACKER
+    _MOE_METRICS_TRACKER = None
+
+
+class MoEMetricsTracker:
+    """Tracker for MoE layer-wise metrics.
+
+    Lifecycle: ``record()`` per-layer values during forward → ``report()`` at
+    step end (sync, aggregate, log, clear) → repeat.
+
+    Example:
+        tracker = get_moe_metrics_tracker()
+        tracker.record("load_balancing_loss", loss, layer_number=1, num_layers=32)
+        log_str = tracker.report(loss_scale=1/8, iteration=100, writer=tb_writer,
+                                 num_layers=32)
+    """
+
+    def __init__(self):
+        self._metrics: Dict[str, MetricEntry] = {}
+
+    # =========================================================================
+    # Public API
+    # =========================================================================
+
+    @property
+    def metrics(self) -> Dict[str, MetricEntry]:
+        """Read-only access to the underlying metric entries."""
+        return self._metrics
+
+    def record(
+        self,
+        name: str,
+        value: torch.Tensor,
+        layer_number: int,
+        num_layers: int,
+        reduce_group: Optional[torch.distributed.ProcessGroup] = None,
+        avg_group: Optional[torch.distributed.ProcessGroup] = None,
+        needs_dp_avg: bool = True,
+    ) -> None:
+        """Accumulate a metric value for a specific layer.
+
+        Called during the router forward pass.  Lazily creates the metric entry
+        on first call for each metric name.
+
+        Args:
+            name: Metric name (e.g. ``"load_balancing_loss"``).
+            value: Scalar tensor to accumulate (will be detached).
+            layer_number: 1-based layer index.
+            num_layers: Total number of layers (determines tensor size).
+            reduce_group: Process group for sum-reduction (e.g. tp_cp_group).
+            avg_group: Process group for average-reduction.
+            needs_dp_avg: Whether to average across DP ranks after other reductions.
+        """
+        if layer_number is None:
+            return
+
+        if name not in self._metrics:
+            self._metrics[name] = MetricEntry(values=torch.zeros(num_layers, device=value.device))
+
+        entry = self._metrics[name]
+        entry.values[layer_number - 1] += value.detach()
+        entry.reduce_group = reduce_group
+        entry.avg_group = avg_group
+        entry.needs_dp_avg = needs_dp_avg
+
+    def report(
+        self,
+        loss_scale: float,
+        iteration: int,
+        writer=None,
+        wandb_writer=None,
+        per_layer_logging: bool = False,
+        force_initialize: bool = False,
+        track_names: Optional[Union[str, List[str]]] = None,
+        num_layers: Optional[int] = None,
+        moe_layer_freq: Optional[Union[int, List[int]]] = None,
+        mtp_num_layers: Optional[int] = None,
+        total_loss_dict: Optional[dict[str, torch.Tensor]] = None,
+        percentiles: Optional[Dict[str, List[float]]] = None,
+        pg_collection: Optional[ProcessGroupCollection] = None,
+    ) -> str:
+        """Sync metrics across ranks, aggregate, log, and clear.
+
+        This is the main entry point called once per training step.  It pairs
+        with :meth:`record`: you *record* individual data points during forward,
+        then *report* the summary at step end.
+
+        Args:
+            loss_scale: Scale factor for averaging across microbatches
+                (usually ``1 / num_microbatches``).
+            iteration: Current training iteration.
+            writer: TensorBoard ``SummaryWriter`` (optional).
+            wandb_writer: Weights & Biases run object (optional).
+            per_layer_logging: Whether to also write per-layer values.
+            force_initialize: If True, pre-create metric entries for *track_names*
+                that don't exist yet.  Required for PP ranks without MoE layers
+                whose tensor sizes must match ranks that do have MoE layers.
+            track_names: Metric name(s) to report.  ``None`` reports all.
+            num_layers: Total transformer layers (required when *force_initialize*).
+            moe_layer_freq: MoE layer frequency or binary pattern list.
+            mtp_num_layers: Extra layers from Multi-Token Prediction.
+            total_loss_dict: Megatron training-loop accumulator.  Metrics
+                ending with ``"loss"`` are accumulated here and excluded from
+                the returned console log string.
+            percentiles: Per-metric percentiles to compute, e.g.
+                ``{"load_imbalance": [0.5, 0.95]}``.
+            pg_collection: Custom process-group collection for reduction.
+
+        Returns:
+            Formatted log string for console output.
+        """
+        metric_names = self._resolve_names(track_names)
+
+        # Pre-create entries on PP ranks that lack MoE layers.
+        # Tensor size must be (num_layers + mtp_num_layers) to match ranks that
+        # recorded via record(), otherwise all_reduce across PP will hang.
+        if force_initialize:
+            if num_layers is None:
+                raise ValueError("num_layers must be provided when force_initialize=True.")
+            init_size = num_layers + (mtp_num_layers or 0)
+            for name in metric_names:
+                self.ensure_initialized(name, init_size)
+
+        self._sync_metrics(metric_names, pg_collection)
+
+        num_moe_layers = self._count_moe_layers(num_layers, moe_layer_freq, mtp_num_layers)
+        scalars = self._aggregate(loss_scale, num_moe_layers, metric_names, percentiles)
+
+        # Megatron integration: accumulate loss metrics into total_loss_dict
+        console_scalars = dict(scalars)
+        if total_loss_dict is not None:
+            for k, v in scalars.items():
+                if k.lower().endswith("loss"):
+                    if k in total_loss_dict:
+                        total_loss_dict[k] += v
+                    else:
+                        total_loss_dict[k] = v
+                    console_scalars.pop(k)
+
+        self._log_scalars(scalars, iteration, writer, wandb_writer)
+        if per_layer_logging:
+            self._log_per_layer(
+                loss_scale, metric_names, iteration, writer, wandb_writer, percentiles
+            )
+
+        log_string = self._format(console_scalars)
+        self.clear()
+        return log_string
+
+    def clear(self) -> None:
+        """Zero out all metric values (entries are kept for reuse)."""
+        for entry in self._metrics.values():
+            entry.values.zero_()
+
+    def ensure_initialized(
+        self, name: str, num_layers: int, device: Optional[Union[str, torch.device, int]] = None
+    ) -> None:
+        """Pre-create a metric entry if it does not already exist.
+
+        This is needed for PP ranks that have no MoE layers -- their tensor
+        size must match ranks that do, otherwise ``all_reduce`` across PP hangs.
+
+        Args:
+            name: Metric name.
+            num_layers: Tensor size (should include MTP layers).
+            device: Device for the zero tensor.  Defaults to current CUDA device.
+        """
+        if name not in self._metrics:
+            if device is None:
+                device = torch.cuda.current_device() if torch.cuda.is_available() else "cpu"
+            self._metrics[name] = MetricEntry(values=torch.zeros(num_layers, device=device))
+
+    # =========================================================================
+    # Private implementation
+    # =========================================================================
+
+    def _resolve_names(self, track_names: Optional[Union[str, List[str]]]) -> List[str]:
+        """Normalize *track_names* argument to a list of strings."""
+        if track_names is None:
+            return list(self._metrics.keys())
+        if isinstance(track_names, str):
+            return [track_names]
+        return track_names
+
+    def _sync_metrics(
+        self, metric_names: List[str], pg_collection: Optional[ProcessGroupCollection] = None
+    ) -> None:
+        """All-reduce metrics across distributed ranks.
+
+        Reduction order: PP collect → reduce_group sum → avg_group avg → DP avg.
+        """
+        if pg_collection is None:
+            pp_group = parallel_state.get_pipeline_model_parallel_group()
+            dp_group = parallel_state.get_data_parallel_group(
+                with_context_parallel=False, partial_data_parallel=False
+            )
+        else:
+            pp_group = pg_collection.pp
+            dp_group = pg_collection.dp
+
+        for name in metric_names:
+            if name not in self._metrics:
+                continue
+
+            entry = self._metrics[name]
+            v = entry.values
+
+            torch.distributed.all_reduce(v, group=pp_group)
+
+            if entry.reduce_group is not None:
+                torch.distributed.all_reduce(v, group=entry.reduce_group)
+
+            if entry.avg_group is not None:
+                torch.distributed.all_reduce(
+                    v, group=entry.avg_group, op=torch.distributed.ReduceOp.AVG
+                )
+
+            if entry.needs_dp_avg:
+                torch.distributed.all_reduce(v, group=dp_group, op=torch.distributed.ReduceOp.AVG)
+
+    @staticmethod
+    def _count_moe_layers(
+        num_layers: Optional[int],
+        moe_layer_freq: Optional[Union[int, List[int]]],
+        mtp_num_layers: Optional[int],
+    ) -> int:
+        """Compute the effective number of MoE layers from configuration."""
+        if moe_layer_freq is None:
+            n = num_layers
+        elif isinstance(moe_layer_freq, int):
+            assert isinstance(num_layers, int)
+            n = sum(1 for i in range(num_layers) if i % moe_layer_freq == 0)
+        elif isinstance(moe_layer_freq, list):
+            n = sum(moe_layer_freq)
+        else:
+            raise ValueError(f"Invalid moe_layer_freq: {moe_layer_freq}")
+
+        if mtp_num_layers is not None:
+            n += mtp_num_layers
+
+        return n
+
+    def _aggregate(
+        self,
+        loss_scale: float,
+        num_moe_layers: int,
+        metric_names: List[str],
+        percentiles: Optional[Dict[str, List[float]]] = None,
+    ) -> Dict[str, Union[float, torch.Tensor]]:
+        """Aggregate per-layer values into scalar summaries.
+
+        Always computes the mean across MoE layers.  If *percentiles* specifies
+        quantiles for a metric, those are computed over non-zero layer values and
+        added as ``"{name}_p{pct}"`` keys.
+        """
+        result: Dict[str, Union[float, torch.Tensor]] = {}
+
+        for name in metric_names:
+            if name not in self._metrics:
+                continue
+
+            values = self._metrics[name].values.float() * loss_scale
+
+            if percentiles and name in percentiles:
+                nonzero = values[values > 0]
+                if nonzero.numel() > 0:
+                    pcts = percentiles[name]
+                    pct_vals = torch.quantile(
+                        nonzero, torch.tensor(pcts, device=nonzero.device)
+                    ).tolist()
+                    for pct, pct_val in zip(pcts, pct_vals):
+                        result[f"{name}_p{int(pct * 100)}"] = pct_val
+
+            result[name] = values.sum() / num_moe_layers
+
+        return result
+
+    def _log_scalars(
+        self, scalars: Dict[str, Union[float, torch.Tensor]], iteration: int, writer, wandb_writer
+    ) -> None:
+        """Write scalar metrics to TensorBoard and/or W&B."""
+        for name, value in scalars.items():
+            if writer is not None:
+                writer.add_scalar(name, value, iteration)
+            if wandb_writer is not None:
+                wandb_writer.log({name: value}, iteration)
+
+    def _log_per_layer(
+        self,
+        loss_scale: float,
+        metric_names: List[str],
+        iteration: int,
+        writer,
+        wandb_writer,
+        percentiles: Optional[Dict[str, List[float]]] = None,
+    ) -> None:
+        """Write per-layer metric values to TensorBoard and/or W&B."""
+        for name in metric_names:
+            if name not in self._metrics:
+                continue
+
+            values = self._metrics[name].values.float() * loss_scale
+            is_sparse = percentiles is not None and name in percentiles
+            for i, val in enumerate(values.tolist()):
+                if is_sparse and val == 0:
+                    continue
+                if writer is not None:
+                    writer.add_scalar(f"moe/{name}_layer_{i}", val, iteration)
+                if wandb_writer is not None:
+                    wandb_writer.log({f"moe/{name}_layer_{i}": val}, iteration)
+
+    @staticmethod
+    def _format(scalars: Dict[str, Union[float, torch.Tensor]]) -> str:
+        """Format aggregated metrics as a console log string."""
+        return "".join(f" {k}: {v:.2f} |" for k, v in scalars.items())
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
index 4c424c74b0b..babba1d904d 100644
--- a/megatron/core/transformer/moe/moe_utils.py
+++ b/megatron/core/transformer/moe/moe_utils.py
@@ -1,5 +1,4 @@
 # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
 import functools
 import math
 from dataclasses import dataclass
@@ -20,9 +19,10 @@
 from megatron.core.tensor_parallel.mappings import reduce_from_tensor_model_parallel_region
 from megatron.core.transformer.cuda_graphs import is_graph_capturing
 from megatron.core.transformer.enums import CudaGraphScope
+from megatron.core.transformer.moe.moe_logging import get_moe_metrics_tracker
 from megatron.core.transformer.moe.router_replay import RouterReplay
 from megatron.core.transformer.transformer_config import TransformerConfig
-from megatron.core.utils import internal_api, is_te_min_version
+from megatron.core.utils import deprecated, internal_api, is_te_min_version
 
 if HAVE_TE:
     from megatron.core.extensions.transformer_engine import (
@@ -52,10 +52,6 @@
     ) = (None, None, None, None, None, None, None, None, None, None)
 
 
-# MOE logging
-_MOE_LAYER_WISE_LOGGING_TRACKER: dict = {}
-
-
 def switch_load_balancing_loss_func(
     probs: torch.Tensor,
     tokens_per_expert: torch.Tensor,
@@ -940,6 +936,9 @@ def apply_router_token_dropping(
     return final_probs, final_map
 
 
+@deprecated(
+    version="0.16", removal_version="0.18", alternative="get_moe_metrics_tracker().record()"
+)
 def save_to_aux_losses_tracker(
     name: str,
     loss: torch.Tensor,
@@ -956,38 +955,36 @@ def save_to_aux_losses_tracker(
         layer_number (int): Layer index of the loss.
         num_layers (int): The number of total layers.
         reduce_group (torch.distributed.ProcessGroup, optional): The group for reducing the loss.
-                                                                 Defaults to None.
+            Defaults to None.
         avg_group (torch.distributed.ProcessGroup, optional): The group for averaging the loss.
-                                                              Defaults to None.
-        reduce_group_has_dp (bool, optional): Whether the reduce group has data parallel ranks.
-            Set this to True if the reduce group has data parallel ranks. This flag is used to
-            ensure the correct reduction in aux loss tracking. Defaults to False.
+            Defaults to None.
+        reduce_group_has_dp (bool, optional): Whether the reduce group already includes DP ranks.
+            If True, DP averaging is skipped. Defaults to False.
     """
-    # Skip aux loss logging if layer_number is None.
-    if layer_number is None:
-        return
-
-    tracker = get_moe_layer_wise_logging_tracker()
-    if name not in tracker:
-        tracker[name] = {}
-        tracker[name]["values"] = torch.zeros(num_layers, device=loss.device)
-    tracker[name]["values"][layer_number - 1] += loss.detach()  # Aggregate the loss for the layer.
-    tracker[name]["reduce_group"] = reduce_group
-    tracker[name]["avg_group"] = avg_group
-    tracker[name]["reduce_group_has_dp"] = reduce_group_has_dp
+    get_moe_metrics_tracker().record(
+        name=name,
+        value=loss,
+        layer_number=layer_number,
+        num_layers=num_layers,
+        reduce_group=reduce_group,
+        avg_group=avg_group,
+        needs_dp_avg=not reduce_group_has_dp,
+    )
 
 
+@deprecated(version="0.16", removal_version="0.18", alternative="get_moe_metrics_tracker().clear()")
 def clear_aux_losses_tracker() -> None:
     """Clear the auxiliary losses."""
-    tracker = get_moe_layer_wise_logging_tracker()
-    for name in tracker:
-        tracker[name]["values"].zero_()
+    get_moe_metrics_tracker().clear()
 
 
+@deprecated(
+    version="0.16", removal_version="0.18", alternative="get_moe_metrics_tracker()._sync_metrics()"
+)
 def reduce_aux_losses_tracker_across_ranks(
     track_names: Optional[List[str]] = None, pg_collection: Optional[ProcessGroupCollection] = None
 ) -> None:
-    """Collect and reduce the auxiliary losses across ranks.
+    """Reduce the auxiliary losses across ranks.
 
     Args:
         track_names (Optional[List[str]], optional):
@@ -995,40 +992,28 @@ def reduce_aux_losses_tracker_across_ranks(
         pg_collection (Optional[ProcessGroupCollection], optional):
             The process group collection. Defaults to None.
     """
-    tracker = get_moe_layer_wise_logging_tracker()
-    if track_names is None:
-        track_names = tracker.keys()
-
-    if pg_collection is None:
-        # Use parallel_state groups
-        pp_group = parallel_state.get_pipeline_model_parallel_group()
-        dp_group = parallel_state.get_data_parallel_group(
-            with_context_parallel=False, partial_data_parallel=False
-        )
-    else:
-        pp_group = pg_collection.pp
-        dp_group = pg_collection.dp
-
-    for name in track_names:
-        values = tracker[name]["values"]
-        # TODO(Hepteract): delete the usage of the global parallel_state.
-        # Collect aux losses across PP.
-        torch.distributed.all_reduce(values, group=pp_group)
-        # Reduce aux losses across ranks.
-        if tracker[name].get('reduce_group') is not None:
-            torch.distributed.all_reduce(values, group=tracker[name].get('reduce_group'))
-            # Need to conduct reduction across data parallel ranks. When the reduce_group
-            # does not have 'dp' attribute, do it manually.
-            if not tracker[name].get('reduce_group_has_dp', False):
-                torch.distributed.all_reduce(
-                    values, group=dp_group, op=torch.distributed.ReduceOp.AVG
-                )
-        if tracker[name].get('avg_group') is not None:
-            torch.distributed.all_reduce(
-                values, group=tracker[name]['avg_group'], op=torch.distributed.ReduceOp.AVG
-            )
-
-
+    tracker = get_moe_metrics_tracker()
+    names_list = track_names if track_names is not None else list(tracker.metrics.keys())
+    tracker._sync_metrics(names_list, pg_collection)
+
+
+@deprecated(version="0.16", removal_version="0.18", alternative="get_moe_metrics_tracker().metrics")
+def get_moe_layer_wise_logging_tracker():
+    """Return the moe layer wise tracker in legacy dict format."""
+    return {
+        name: {
+            "values": entry.values,
+            "reduce_group": entry.reduce_group,
+            "avg_group": entry.avg_group,
+            "needs_dp_avg": entry.needs_dp_avg,
+        }
+        for name, entry in get_moe_metrics_tracker().metrics.items()
+    }
+
+
+@deprecated(
+    version="0.15", removal_version="0.17", alternative="get_moe_metrics_tracker().report()"
+)
 def track_moe_metrics(
     loss_scale: float,
     iteration: int,
@@ -1042,95 +1027,25 @@ def track_moe_metrics(
     moe_layer_freq: Optional[Union[int, List[int]]] = None,
     mtp_num_layers: Optional[int] = None,
     pg_collection: Optional[ProcessGroupCollection] = None,
-) -> None:
+) -> str:
     """Track the MoE metrics for logging.
 
-    Args:
-        loss_scale (float): The loss scale.
-        iteration (int): The iteration.
-        writer (SummaryWriter, optional): The tensorboard writer. Defaults to None.
-        wandb_writer (wandb.Run, optional): The wandb writer. Defaults to None.
-        total_loss_dict (dict[str, torch.Tensor], optional): The total loss dictionary.
-                                                             Defaults to None.
-        per_layer_logging (bool, optional): Whether to log per layer. Defaults to False.
-        force_initialize (bool, optional): Whether to force initialize the tracker.
-                                           Defaults to False.
-        track_names (List[str], optional): The names of the losses to track. Defaults to None.
-        num_layers (int, optional): The number of layers. Defaults to None.
-        moe_layer_freq (Union[int, List[int]], optional): The frequency of the MoE layers.
-                                                          Defaults to None.
-        mtp_num_layers (int, optional): The number of layers in the model parallel group.
-                                        Defaults to None.
-        pg_collection (ProcessGroupCollection, optional): The process group collection.
-                                                          Defaults to None.
+    Deprecated: Use get_moe_metrics_tracker().report() directly.
     """
-    # Aux loss logging
-    tracker = get_moe_layer_wise_logging_tracker()
-    # Initialize the tracker if force_initialize is True.
-    # The values tensor size must match what the router creates in save_to_aux_losses_tracker,
-    # which uses (num_layers + mtp_num_layers). This is important for PP ranks that have no
-    # MoE layers (so the tracker is empty and force_initialize creates the entry); their tensor
-    # size must match ranks that do have MoE layers, otherwise all_reduce across PP will hang.
-    tracker_num_layers = num_layers
-    if mtp_num_layers is not None:
-        tracker_num_layers += mtp_num_layers
-    if force_initialize:
-        if track_names is not None:
-            for key in track_names:
-                if key not in tracker:
-                    tracker[key] = {}
-                    tracker[key]["values"] = torch.zeros(tracker_num_layers, device="cuda")
-                    tracker[key]["reduce_group"] = None
-                    tracker[key]["avg_group"] = None
-                    tracker[key]["reduce_group_has_dp"] = False
-    reduce_aux_losses_tracker_across_ranks(track_names, pg_collection=pg_collection)
-
-    # Get number of MoE layers
-    if moe_layer_freq is None:
-        num_moe_layers = num_layers
-    elif isinstance(moe_layer_freq, int):
-        assert isinstance(num_layers, int)
-        moe_layer_pattern = [1 if (i % moe_layer_freq == 0) else 0 for i in range(num_layers)]
-        num_moe_layers = sum(moe_layer_pattern)
-    elif isinstance(moe_layer_freq, list):
-        num_moe_layers = sum(moe_layer_freq)
-    else:
-        raise ValueError(f"Invalid moe_layer_freq: {moe_layer_freq}")
-
-    if mtp_num_layers is not None:
-        num_moe_layers += mtp_num_layers
-
-    aux_losses = {k: v['values'].float() * loss_scale for k, v in tracker.items()}
-    for name, loss_list in aux_losses.items():
-        if total_loss_dict is not None:
-            if name not in total_loss_dict:
-                total_loss_dict[name] = loss_list.sum() / num_moe_layers
-            else:
-                total_loss_dict[name] += loss_list.sum() / num_moe_layers
-        if writer is not None:
-            # currently when using add_scalars,
-            # torch.utils.add_scalars makes each timer its own run, which
-            # polutes the runs list, so we just add each as a scalar
-            writer.add_scalar(name, loss_list.sum() / num_moe_layers, iteration)
-            if per_layer_logging:
-                for i, loss in enumerate(loss_list.tolist()):
-                    writer.add_scalar(f"moe/{name}_layer_{i}", loss, iteration)
-
-            # W&B logging lacks support for logging multiple scalars simultaneously.
-            # As a workaround, we log each scalar individually first, then we can create
-            # a custom panel to manually group them to a single plot.
-            if wandb_writer:
-                wandb_writer.log({f"{name}": loss_list.sum() / num_moe_layers}, iteration)
-                if per_layer_logging:
-                    wandb_writer.log(
-                        {
-                            f"moe/{name}_layer_{i}": loss
-                            for i, loss in enumerate(loss_list.tolist())
-                        },
-                        iteration,
-                    )
-
-    clear_aux_losses_tracker()
+    return get_moe_metrics_tracker().report(
+        loss_scale=loss_scale,
+        iteration=iteration,
+        writer=writer,
+        wandb_writer=wandb_writer,
+        per_layer_logging=per_layer_logging,
+        force_initialize=force_initialize,
+        track_names=track_names,
+        num_layers=num_layers,
+        moe_layer_freq=moe_layer_freq,
+        mtp_num_layers=mtp_num_layers,
+        pg_collection=pg_collection,
+        total_loss_dict=total_loss_dict,
+    )
 
 
 def get_updated_expert_bias(
@@ -1184,12 +1099,6 @@ def maybe_move_tensor_to_cpu(
     return tensor
 
 
-def get_moe_layer_wise_logging_tracker() -> dict:
-    """Return the moe layer wise tracker."""
-    global _MOE_LAYER_WISE_LOGGING_TRACKER
-    return _MOE_LAYER_WISE_LOGGING_TRACKER
-
-
 @internal_api
 class RandomSTE(torch.autograd.Function):
     """
@@ -1402,13 +1311,32 @@ def get_align_size_for_quantization(config: TransformerConfig) -> int:
     Returns:
         int: The alignment size for quantization.
     """
+    # CUTLASS kernel for grouped GEMM assumes 256 alignment.
+    if config.use_transformer_engine_op_fuser:
+        return 256
     if config.fp8:
         return get_fp8_align_size(config.fp8_recipe)
-    elif config.fp4:
+    if config.fp4:
         return get_fp4_align_size(config.fp4_recipe)
     return 16
 
 
+def skip_routed_expert_padding(config: TransformerConfig) -> bool:
+    """Whether the expert module should skip quantization padding.
+
+    Returns True when padding is already applied by the router or the
+    HybridEP dispatcher.
+    """
+    if config.moe_router_padding_for_quantization:
+        return True
+    if (
+        config.moe_token_dispatcher_type == "flex"
+        and config.moe_flex_dispatcher_backend == "hybridep"
+    ):
+        return True
+    return False
+
+
 # TODO(Hepteract): delete the usage of the global parallel_state.
 # Initialize process groups with the global parallel_state.
 def get_default_pg_collection() -> ProcessGroupCollection:
@@ -1470,12 +1398,7 @@ def get_early_return_outputs(
             outputs = [self.kwargs['hidden_states'], self.kwargs['probs']]
             valid_cudagraph_attrs = []
             for attr_name in self.moe_layer.token_dispatcher.cudagraph_attrs:
-                hier_attr_name = attr_name.split('.')
-                attr = self.moe_layer.token_dispatcher
-                for name in hier_attr_name:
-                    attr = getattr(attr, name, None)
-                    if attr is None:
-                        break
+                attr = self.moe_layer.token_dispatcher.get_cudagraph_attr(attr_name)
                 if isinstance(attr, torch.Tensor):
                     outputs.append(attr)
                     valid_cudagraph_attrs.append(attr_name)
diff --git a/megatron/core/transformer/moe/router.py b/megatron/core/transformer/moe/router.py
index a773775a299..b675d33cd21 100644
--- a/megatron/core/transformer/moe/router.py
+++ b/megatron/core/transformer/moe/router.py
@@ -7,6 +7,7 @@
 
 from megatron.core.jit import jit_fuser
 from megatron.core.transformer.module import MegatronModule
+from megatron.core.transformer.moe.moe_logging import get_moe_metrics_tracker
 from megatron.core.transformer.moe.moe_utils import (
     MoEAuxLossAutoScaler,
     ProcessGroupCollection,
@@ -16,7 +17,6 @@
     compute_routing_scores_for_aux_loss,
     get_tokens_per_expert_and_token_count,
     router_gating_linear,
-    save_to_aux_losses_tracker,
     sinkhorn,
     switch_load_balancing_loss_func,
     topk_routing_with_score_function,
@@ -312,6 +312,7 @@ def _apply_aux_loss(
             moe_aux_loss_coeff=aux_loss_coeff,
             fused=self.config.moe_router_fusion,
         )
+
         probs = self.attach_and_log_load_balancing_loss(
             probs,
             aux_loss_coeff,
@@ -398,7 +399,6 @@ def _apply_global_aux_loss(
                 topk=self.topk,
             )
         )
-
         self.global_tokens_per_expert += global_tokens_per_expert
         self.ga_steps += 1
         averated_tokens_per_expert = self.global_tokens_per_expert / self.ga_steps
@@ -412,13 +412,14 @@ def _apply_global_aux_loss(
             moe_aux_loss_coeff=global_aux_loss_coeff,
             fused=self.config.moe_router_fusion,
         )
+
         probs = self.attach_and_log_load_balancing_loss(
             probs,
             global_aux_loss_coeff,
             global_aux_loss,
             "global_load_balancing_loss",
             self.tp_dp_cp_group,
-            reduce_group_has_dp=True,
+            needs_dp_avg=False,
             valid_token_count=local_num_tokens,
         )
         return probs
@@ -430,7 +431,7 @@ def attach_and_log_load_balancing_loss(
         aux_loss: torch.Tensor,
         aux_loss_name: str,
         reduce_group: torch.distributed.ProcessGroup,
-        reduce_group_has_dp: bool = False,
+        needs_dp_avg: bool = True,
         valid_token_count: Optional[Union[int, torch.Tensor]] = None,
     ):
         """Attach aux loss function to activation and add to logging.
@@ -441,9 +442,7 @@ def attach_and_log_load_balancing_loss(
             aux_loss (torch.Tensor): Computed aux loss.
             aux_loss_name (str): Name of the aux loss for logging.
             reduce_group (torch.distributed.ProcessGroup): Process group for reduction.
-            reduce_group_has_dp (bool): Whether the reduce group has data parallel ranks.
-                Set this to True if the reduce group has data parallel ranks. This flag is used to
-                ensure the correct reduction in aux loss tracking.
+            needs_dp_avg (bool): Whether to average this metric across DP ranks after reduce_group.
             valid_token_count (int or torch.Tensor, optional): Number of valid tokens excluding
                 padding tokens. Can be a Python int or a torch.Tensor (typically 0-d tensor).
                 If None, uses activation.shape[0]. Defaults to None.
@@ -471,13 +470,13 @@ def attach_and_log_load_balancing_loss(
         else:
             layer_number = self.layer_number
 
-        save_to_aux_losses_tracker(
+        get_moe_metrics_tracker().record(
             aux_loss_name,
             aux_loss / aux_loss_coeff,
             layer_number,
             num_layers,
             reduce_group=reduce_group,
-            reduce_group_has_dp=reduce_group_has_dp,
+            needs_dp_avg=needs_dp_avg,
         )
         if self.calculate_per_token_loss:
             # Scale the aux_loss by the number of tokens.
@@ -544,7 +543,7 @@ def apply_z_loss(self, logits, padding_mask: Optional[torch.Tensor] = None):
             else:
                 layer_number = self.layer_number
 
-            save_to_aux_losses_tracker(
+            get_moe_metrics_tracker().record(
                 "z_loss", z_loss / moe_z_loss_coeff, layer_number, num_layers
             )
         return logits
diff --git a/megatron/core/transformer/moe/token_dispatcher.py b/megatron/core/transformer/moe/token_dispatcher.py
index 2466ffc0825..3d353666b70 100644
--- a/megatron/core/transformer/moe/token_dispatcher.py
+++ b/megatron/core/transformer/moe/token_dispatcher.py
@@ -37,6 +37,8 @@
 from megatron.core.transformer.moe.shared_experts import SharedExpertMLP
 from megatron.core.transformer.transformer_config import TransformerConfig
 
+logger = logging.getLogger(__name__)
+
 """ We use the following notation throughout this file:
      H: hidden size
      B: micro batch size
@@ -47,8 +49,6 @@
      num_global_tokens: num_local_tokens*TP*EP
 """
 
-logger = logging.getLogger(__name__)
-
 
 class MoETokenDispatcher:
     """
@@ -82,6 +82,23 @@ def __init__(
         self.cudagraph_attrs = []
         self.valid_cudagraph_attrs = None
 
+    def get_cudagraph_attr(self, attr_name: str):
+        """Resolve a cudagraph attribute path, including nested attributes."""
+        attr = self
+        for name in attr_name.split('.'):
+            attr = getattr(attr, name, None)
+            if attr is None:
+                return None
+        return attr
+
+    def set_cudagraph_attr(self, attr_name: str, value) -> None:
+        """Assign to a cudagraph attribute path, including nested attributes."""
+        hier_attr_name = attr_name.split('.')
+        attr = self
+        for name in hier_attr_name[:-1]:
+            attr = getattr(attr, name)
+        setattr(attr, hier_attr_name[-1], value)
+
     @abstractmethod
     def dispatch_preprocess(
         self, tokens: torch.Tensor, routing_map: torch.Tensor, probs: torch.Tensor
diff --git a/megatron/core/transformer/multi_latent_attention.py b/megatron/core/transformer/multi_latent_attention.py
index 97584fef164..20625423a7f 100644
--- a/megatron/core/transformer/multi_latent_attention.py
+++ b/megatron/core/transformer/multi_latent_attention.py
@@ -258,7 +258,8 @@ def forward(
         # Get the query, key and value tensors based on the type of attention -
         # self or cross attn.
         # query: [96, 1, 16, 128], key:[96, 1, 16, 128], value:[96, 1, 16, 128]
-        with off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear") as hidden_states:
+        qkv_linear_manager = off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear")
+        with qkv_linear_manager as hidden_states:
             query, key, value, q_compressed, kv_compressed = self.get_query_key_value_tensors(
                 hidden_states,
                 key_value_states,
@@ -266,10 +267,7 @@ def forward(
                 packed_seq_params,
                 inference_context=inference_context,
             )
-        if self.offload_qkv_linear:
-            query = off_interface.group_commit(
-                query, name="qkv_linear", forced_released_tensors=[hidden_states]
-            )
+        query = qkv_linear_manager.group_offload(query, forced_released_tensors=[hidden_states])
 
         # ===================================================
         # Adjust key, value for inference
@@ -291,6 +289,9 @@ def forward(
         # core attention computation
         # ==================================
         # Need corresponding TE change
+        core_attn_manager = off_interface(
+            self.offload_core_attention and self.training, query, "core_attn"
+        )
         if self.checkpoint_core_attention and self.training:
             core_attn_out = self._checkpointed_attention_forward(
                 query, key, value, attention_mask, packed_seq_params=packed_seq_params
@@ -303,9 +304,7 @@ def forward(
                     # query representation.
                     extra_kwargs["x"] = hidden_states
                     extra_kwargs["qr"] = q_compressed
-                with off_interface(
-                    self.offload_core_attention and self.training, query, "core_attn"
-                ) as query:
+                with core_attn_manager as query:
                     core_attn_out = self.core_attention(
                         query,
                         key,
@@ -335,10 +334,9 @@ def forward(
                 # Only rearrange if not in absorption mode (Flash MLA handles format correctly)
                 if not inference_context.is_decode_only():
                     core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')
-            if self.offload_core_attention and self.training:
-                core_attn_out = off_interface.group_commit(
-                    core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
-                )
+            core_attn_out = core_attn_manager.group_offload(
+                core_attn_out, forced_released_tensors=[query, key, value]
+            )
 
         # We are doing absorption with cache mla latents and decode mode.
         if self.cache_mla_latents and inference_context.is_decode_only():
@@ -364,12 +362,10 @@ def forward(
         # =================
         # Output. [sq, b, h]
         # =================
-        with off_interface(self.offload_attn_proj, core_attn_out, "attn_proj") as core_attn_out:
+        attn_proj_manager = off_interface(self.offload_attn_proj, core_attn_out, "attn_proj")
+        with attn_proj_manager as core_attn_out:
             output, bias = self.linear_proj(core_attn_out)
-        if self.offload_attn_proj:
-            output = off_interface.group_commit(
-                output, name="attn_proj", forced_released_tensors=[core_attn_out]
-            )
+        output = attn_proj_manager.group_offload(output, forced_released_tensors=[core_attn_out])
 
         return output, bias
 
@@ -571,8 +567,8 @@ def get_query_key_value_tensors(
         if packed_seq_params is not None:
             assert (
                 packed_seq_params.local_cp_size is None
-            ), "hybrid_context_parallel is not supported with MLA yet and is planned for future. \
-            Please disable hybrid_context_parallel."
+            ), "dynamic_context_parallel is not supported with MLA yet and is planned for future. \
+            Please disable dynamic_context_parallel."
 
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
@@ -825,6 +821,7 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
                     cu_seqlens=cu_seqlens_q,
                     mscale=mscale,
                     cp_group=self.pg_collection.cp,
+                    mla_rotary_interleaved=True,
                 )
                 # k_pos_emb:[num_tokens, 1, qk_pos_emb_head_dim]
                 k_pos_emb = apply_rotary_pos_emb(
@@ -834,6 +831,7 @@ def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_po
                     cu_seqlens=cu_seqlens_kv,
                     mscale=mscale,
                     cp_group=self.pg_collection.cp,
+                    mla_rotary_interleaved=True,
                 )
 
                 # query: [num_tokens, n, (qk_head_dim + v_head_dim)]
diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py
index 8fe7a2636b0..244d95995f0 100755
--- a/megatron/core/transformer/multi_token_prediction.py
+++ b/megatron/core/transformer/multi_token_prediction.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 from __future__ import annotations
 
 import warnings
@@ -661,21 +661,33 @@ def process_mtp_loss(
     # correctly scaled relative to the main loss gradients in finalize_model_grads.
     original_num_tokens = loss_mask.sum()
 
+    fuse_linear_cross_entropy = (
+        config.cross_entropy_loss_fusion and config.cross_entropy_fusion_impl == "linear"
+    )
     for mtp_layer_number in range(config.mtp_num_layers):
-        mtp_logits, _ = output_layer(
-            hidden_states_list[mtp_layer_number + 1],
-            weight=output_weight,
-            runtime_gather_output=runtime_gather_output,
-        )
-        if scale_logits_fn is not None:
-            mtp_logits = scale_logits_fn(mtp_logits)
         mtp_labels, _ = roll_tensor(
             mtp_labels, shifts=-1, dims=-1, cp_group=cp_group, packed_seq_params=packed_seq_params
         )
         loss_mask, num_tokens = roll_tensor(
             loss_mask, shifts=-1, dims=-1, cp_group=cp_group, packed_seq_params=packed_seq_params
         )
-        mtp_loss = compute_language_model_loss(mtp_labels, mtp_logits)
+        if fuse_linear_cross_entropy:
+            mtp_loss = output_layer(
+                hidden_states_list[mtp_layer_number + 1],
+                weight=output_weight,
+                runtime_gather_output=runtime_gather_output,
+                output_cross_entropy_loss=True,
+                labels=mtp_labels,
+            )
+        else:
+            mtp_logits, _ = output_layer(
+                hidden_states_list[mtp_layer_number + 1],
+                weight=output_weight,
+                runtime_gather_output=runtime_gather_output,
+            )
+            if scale_logits_fn is not None:
+                mtp_logits = scale_logits_fn(mtp_logits)
+            mtp_loss = compute_language_model_loss(mtp_labels, mtp_logits)
         mtp_loss = loss_mask * mtp_loss
         if is_training:
             mtp_loss_for_log = (
diff --git a/megatron/core/transformer/transformer_block.py b/megatron/core/transformer/transformer_block.py
index 8bea3b8c94e..89d0b2cb75e 100755
--- a/megatron/core/transformer/transformer_block.py
+++ b/megatron/core/transformer/transformer_block.py
@@ -1,8 +1,9 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 import logging
 from contextlib import nullcontext
 from dataclasses import dataclass
-from typing import List, Optional, Set, Union, cast
+from typing import List, Optional, Set, Tuple, Union, cast
 
 import torch
 from torch import Tensor
@@ -19,7 +20,9 @@
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.pipeline_parallel.utils import is_vp_first_stage, is_vp_last_stage
 from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.tensor_parallel.random import CheckpointManager
 from megatron.core.transformer.enums import CudaGraphScope, LayerType
+from megatron.core.transformer.hyper_connection import HyperConnectionModule
 from megatron.core.transformer.module import GraphableMegatronModule, MegatronModule
 from megatron.core.transformer.spec_utils import ModuleSpec, build_module
 from megatron.core.transformer.torch_norm import LayerNormBuilder
@@ -319,6 +322,7 @@ def __init__(
             self.offload_context, self.group_prefetch_offload_commit_async = nullcontext(), None
             self.config._cpu_offloading_context = None
 
+        self.num_residual_streams = config.num_residual_streams
         self._build_layers()
         self.num_layers_per_pipeline_rank = len(self.layers)
 
@@ -386,7 +390,6 @@ def build_layer(layer_spec, layer_number):
     def has_final_layernorm_in_this_stage(self):
         """
         Check if this vpp stage contains the final layernorm.
-
         Note:
             Final layernorm now has been moved from the post-process stage to the last decoder
             layer by using this function.
@@ -642,6 +645,46 @@ def __call__(self, *args, **kwargs):
             return super().__call__(*args, **kwargs)[0]
         return super().__call__(*args, **kwargs)
 
+    def _build_mhc_recompute_layer_plan(
+        self, use_mhc_recompute: bool
+    ) -> Tuple[List[Optional[CheckpointManager]], List[bool]]:
+        """Pre-build per-layer MHC recompute managers and block-end markers."""
+        num_layers = len(self.layers)
+        layer_managers: List[Optional[CheckpointManager]] = [None] * num_layers
+        is_recompute_block_end: List[bool] = [False] * num_layers
+
+        if not use_mhc_recompute or num_layers == 0:
+            return layer_managers, is_recompute_block_end
+
+        mhc_recompute_layer_num = self.config.mhc_recompute_layer_num
+        mhc_manager = CheckpointManager()
+
+        for l_no in range(num_layers):
+            is_last_in_transformer_block = l_no == num_layers - 1
+            is_last_in_recompute_block = is_last_in_transformer_block
+            if mhc_recompute_layer_num is not None:
+                is_last_in_recompute_block = is_last_in_transformer_block or (
+                    (l_no + 1) % mhc_recompute_layer_num == 0
+                )
+
+            layer_managers[l_no] = mhc_manager
+            is_recompute_block_end[l_no] = is_last_in_recompute_block
+
+            if is_last_in_recompute_block and not is_last_in_transformer_block:
+                mhc_manager = CheckpointManager()
+
+        return layer_managers, is_recompute_block_end
+
+    @staticmethod
+    def _finalize_mhc_recompute_layer(
+        mhc_manager: Optional[CheckpointManager],
+        hidden_states: Tensor,
+        is_last_in_recompute_block: bool,
+    ) -> None:
+        """Finalize MHC recompute state for the current layer when block ends."""
+        if mhc_manager is not None and is_last_in_recompute_block:
+            mhc_manager.discard_all_outputs_and_register_unified_recompute(hidden_states)
+
     def forward(
         self,
         hidden_states: Union[Tensor, WrappedTensor],
@@ -751,6 +794,13 @@ def forward(
         #   is called here to be future-proof and corner-case-proof.
         hidden_states = make_viewless_tensor(inp=hidden_states, requires_grad=True, keep_graph=True)
 
+        # Expand hidden states for hyper connections at the start of the block
+        # Only expand at the first PP stage; subsequent stages receive n-stream from previous stage
+        if self.config.enable_hyper_connections and self.pre_process:
+            hidden_states = HyperConnectionModule.input_expand(
+                hidden_states, self.num_residual_streams
+            )  # [s, b, C] -> [s, b, n*C]
+
         if self.config.sequence_parallel:
             rng_context = tensor_parallel.get_cuda_rng_tracker().fork()
         else:
@@ -778,6 +828,18 @@ def forward(
             use_inner_quantization_context = False
             outer_quantization_context = nullcontext()
 
+        # Determine if MHC recompute should be used
+        # Only enable when: training mode AND hyper connections AND 'mhc' in recompute_modules
+        use_mhc_recompute = (
+            self.training
+            and self.config.enable_hyper_connections
+            and self.config.recompute_granularity == 'selective'
+            and "mhc" in self.config.recompute_modules
+        )
+        mhc_layer_managers, mhc_is_last_in_recompute_block = self._build_mhc_recompute_layer_plan(
+            use_mhc_recompute
+        )
+
         with rng_context, outer_quantization_context:
             # Forward pass.
             if self.config.recompute_granularity == 'full' and self.training:
@@ -818,6 +880,12 @@ def forward(
                     else:
                         inner_quantization_context = nullcontext()
 
+                    mhc_manager = mhc_layer_managers[l_no]
+                    if mhc_manager is not None:
+                        mhc_manager.is_last_layer_in_recompute_block = (
+                            mhc_is_last_in_recompute_block[l_no]
+                        )
+
                     with self.offload_context, inner_quantization_context:
                         hidden_states, context = layer(
                             hidden_states=hidden_states,
@@ -833,7 +901,13 @@ def forward(
                             packed_seq_params=packed_seq_params,
                             sequence_len_offset=sequence_len_offset,
                             padding_mask=padding_mask,
+                            mhc_recompute_manager=mhc_manager,
                         )
+                    self._finalize_mhc_recompute_layer(
+                        mhc_manager=mhc_manager,
+                        hidden_states=hidden_states,
+                        is_last_in_recompute_block=mhc_is_last_in_recompute_block[l_no],
+                    )
 
                     if (
                         torch.is_grad_enabled()
@@ -846,6 +920,12 @@ def forward(
                     if (l_no + layer_offset) in extract_layer_indices:
                         intermediate_hidden_states.append(hidden_states)
 
+        # Only contract if the final layer norm is in this stage
+        if self.config.enable_hyper_connections and self.has_final_layernorm_in_this_stage():
+            hidden_states = HyperConnectionModule.output_contract(
+                hidden_states, self.num_residual_streams
+            )  # [s, b, n*C] -> [s, b, C]
+
         # Final layer norm.
         if self.final_layernorm is not None:
             hidden_states = apply_module(self.final_layernorm)(cast(Tensor, hidden_states))
diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py
index 99f0093a521..e1c71761f1f 100644
--- a/megatron/core/transformer/transformer_config.py
+++ b/megatron/core/transformer/transformer_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import logging
 import math
@@ -13,6 +13,7 @@
 from megatron.core.quantization.quant_config import RecipeConfig
 from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
 from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout
+from megatron.core.utils import experimental_api
 
 from .._rank_utils import log_single_rank
 from ..fusions.fused_bias_geglu import quick_gelu
@@ -37,6 +38,7 @@
 
 
 @dataclass
+@experimental_api
 class TransformerConfig(ModelParallelConfig):
     """Configuration object for megatron-core transformers.
 
@@ -287,6 +289,9 @@ class TransformerConfig(ModelParallelConfig):
     ####################
     # linear attention
     ####################
+    linear_attention_type: Optional[str] = None
+    """Type of linear attention to use.
+    Deprecated. Use experimental_attention_variant instead."""
     linear_attention_freq: Optional[Union[int, List[int]]] = None
     """Frequency between LA (linear attention) layers 
     and SDPA (scaled dot-product attention) layers.
@@ -446,6 +451,10 @@ class TransformerConfig(ModelParallelConfig):
     fused_single_qkv_rope: bool = False
     """If set, avoid splitting QKV before ROPE forward and avoid concatenating ROPE dgrads."""
 
+    use_transformer_engine_op_fuser: bool = False
+    """If True, submodules may use Transformer Engine's operation fuser
+    API to enable advanced fusions."""
+
     fused_residual_rmsnorm: bool = False
     """If True, fuses residual connection and RMSNorm backward pass when TE is used."""
 
@@ -482,7 +491,8 @@ class TransformerConfig(ModelParallelConfig):
 
     recompute_modules: Optional[List[str]] = None
     """The submodules to recompute.
-    choices: "core_attn", "moe_act", "layernorm", "mla_up_proj", "mlp", "moe", "shared_experts".
+    choices: "core_attn", "moe_act", "layernorm", "mla_up_proj", "mlp", "moe",
+             "shared_experts", "mhc".
     default: ["core_attn"].
     "core_attn": recompute the core attention part of the transformer layer.
     "moe_act": recompute the MoE MLP activation function.
@@ -491,7 +501,10 @@ class TransformerConfig(ModelParallelConfig):
     "mlp": recompute the dense MLP submodule.
     "moe": recompute the MoE layer.
     "shared_experts": recompute the shared experts in the MoE layer.
-    "moe_act", "layernorm", and "mla_up_proj" use output-discarding checkpointing,
+    "mhc": recompute HyperConnection intermediate activations via
+            CheckpointWithoutOutput + CheckpointManager. Requires
+            enable_hyper_connections=True. Cannot be used with "mlp".
+    "moe_act", "layernorm", "mla_up_proj", and "mhc" use output-discarding checkpointing,
     "core_attn", "mlp", "moe", and "shared_experts" use normal checkpointing.
     """
 
@@ -616,7 +629,7 @@ class TransformerConfig(ModelParallelConfig):
     in the hidden_states gradient."""
 
     moe_shared_expert_gate: bool = False
-    """Enable gate for shared expert. Only effective when 
+    """Enable gate for shared expert. Only effective when
     moe-shared-expert-intermediate-size is set."""
 
     moe_shared_expert_overlap: bool = False
@@ -807,6 +820,15 @@ class TransformerConfig(ModelParallelConfig):
     """Number of SMs to use for HybridEP. In pure NVL scenarios,
     16 SMs can generally achieve good bandwidth."""
 
+    moe_mlp_glu_interleave_size: Optional[int] = None
+    """When set, GLU activations in the MoE grouped MLP layer will use a
+    block interleaved format. Instead of interpreting the input tensor
+    as a concatenation of gates and linear units, it will be
+    interpreted as alternating blocks of gates and linear units.
+
+    This data format is experimental and primarily intended to enable
+    advanced fused kernels."""
+
     ##################
     # Context Parallel
     ##################
@@ -871,6 +893,45 @@ class TransformerConfig(ModelParallelConfig):
     When cuda_graph_impl is set to "local", "full_iteration" can be specified as cuda_graph_scope
     to enable whole iteration CUDA graph. All other values enable layerwise CUDA graph."""
 
+    ####################
+    # Hyper-Connection Configuration
+    ####################
+    enable_hyper_connections: bool = False
+    """Enable mHC residual connections."""
+
+    num_residual_streams: int = 4
+    """Number of residual streams (n in paper)."""
+
+    mhc_sinkhorn_iterations: int = 20
+    """Number of Sinkhorn-Knopp iterations for doubly stochastic projection."""
+
+    mhc_init_gating_factor: float = 0.01
+    """Initial value of Gating Factor (alpha in paper)."""
+
+    use_fused_mhc: bool = False
+    """Use cuTile fused kernels for mHC operations.
+
+    When True, attempts to replace the reference mHC modules (SinkhornKnopp,
+    H_aggregate, H_post_bda, ProjRms) with fused cuda.tile (cuTile) autograd
+    functions for better performance on supported GPUs.  Requires cuTile to be
+    installed; if cuTile is unavailable the flag is silently reset to False and
+    a warning is emitted.
+    """
+
+    mhc_recompute_layer_num: Optional[int] = None
+    """Number of layers per MHC recompute block.
+    
+    When set, every `mhc_recompute_layer_num` layers form a recompute block. The last layer
+    in each recompute block (i.e., layer_number % mhc_recompute_layer_num == 0 or the final
+    layer in the transformer block) will:
+    - NOT checkpoint its final MLP BDA
+    - Register the unified recompute hook on its MLP BDA output
+    - A new CheckpointManager is created for subsequent layers
+    
+    If None, all layers in the transformer block share a single recompute block.
+
+    Must be a positive integer when set."""
+
     ####################
     # miscellaneous
     ####################
@@ -988,6 +1049,9 @@ class TransformerConfig(ModelParallelConfig):
     """Transformer implementation to use.
     Options are 'transformer_engine' for Transformer Engine and 'local' for MCore."""
 
+    fallback_to_eager_attn: bool = False
+    """Whether to fallback to eager attention in TE implementation.
+    Suggested for when desired features are not available in TE implementation."""
     #####################################
     # Fine-grained Activation Offloading
     #####################################
@@ -1011,6 +1075,21 @@ class TransformerConfig(ModelParallelConfig):
     min_offloaded_tensor_size: int = 1024 * 1024
     """The minimum size of the tensor to be offloaded."""
 
+    delay_offload_until_cuda_graph: bool = False
+    """If True, delay the offload until the CUDA graph is executed for minimal CPU overhead.
+    For more details, see the documentation:
+    https://github.com/NVIDIA/Megatron-LM/blob/main/docs/user-guide/features/fine_grained_activation_offloading.md#cuda-graph-integration.
+    """
+
+    delta_offload_bytes_across_pp_ranks: int = 0
+    """Difference of offload bytes across PP ranks to balance the offload load.
+    For more details, see the documentation:
+    https://github.com/NVIDIA/Megatron-LM/blob/main/docs/user-guide/features/fine_grained_activation_offloading.md#tuning-parameters.
+    """
+
+    activation_offload_fraction: float = 1.0
+    """The fraction of the activation to be offloaded, which should be in range [0, 1]."""
+
     def __post_init__(self):
         """Python dataclass method that is used to modify attributes after initialization.
         See https://docs.python.org/3/library/dataclasses.html#post-init-processing for more
@@ -1064,45 +1143,56 @@ def __post_init__(self):
                 f"tensor_model_parallel_size ({self.tensor_model_parallel_size})."
             )
 
-        if self.experimental_attention_variant == "gated_delta_net":
+        if self.linear_attention_type is not None:
+            warnings.warn(
+                "linear_attention_type is deprecated, "
+                "use experimental_attention_variant instead."
+            )
+            self.experimental_attention_variant = self.linear_attention_type
+            self.linear_attention_type = None
+
+        if self.experimental_attention_variant in ["gated_delta_net"]:
             assert (
                 self.linear_attention_freq is not None
-            ), f"linear_attention_freq must be set for linear gated_delta_net."
+            ), f"linear_attention_freq must be set for linear attention."
 
-            # Check required parameters
-            assert (
-                self.linear_conv_kernel_dim is not None
-            ), "linear_conv_kernel_dim must be set for gated delta net."
-            assert (
-                self.linear_key_head_dim is not None
-            ), "linear_key_head_dim must be set for gated delta net."
-            assert (
-                self.linear_value_head_dim is not None
-            ), "linear_value_head_dim must be set for gated delta net."
-            assert (
-                self.linear_num_key_heads is not None
-            ), "linear_num_key_heads must be set for gated delta net."
-            assert (
-                self.linear_num_value_heads is not None
-            ), "linear_num_value_heads must be set for gated delta net."
-            assert self.linear_num_value_heads % self.linear_num_key_heads == 0, (
-                f"linear_num_value_heads ({self.linear_num_value_heads}) must be a multiple of "
-                f"linear_num_key_heads ({self.linear_num_key_heads})."
-            )
+            if self.experimental_attention_variant == "gated_delta_net":
+                # Check required parameters
+                assert (
+                    self.linear_conv_kernel_dim is not None
+                ), "linear_conv_kernel_dim must be set for gated delta net."
+                assert (
+                    self.linear_key_head_dim is not None
+                ), "linear_key_head_dim must be set for gated delta net."
+                assert (
+                    self.linear_value_head_dim is not None
+                ), "linear_value_head_dim must be set for gated delta net."
+                assert (
+                    self.linear_num_key_heads is not None
+                ), "linear_num_key_heads must be set for gated delta net."
+                assert (
+                    self.linear_num_value_heads is not None
+                ), "linear_num_value_heads must be set for gated delta net."
+                assert self.linear_num_value_heads % self.linear_num_key_heads == 0, (
+                    f"linear_num_value_heads ({self.linear_num_value_heads}) must be a multiple of "
+                    f"linear_num_key_heads ({self.linear_num_key_heads})."
+                )
 
-            # Check tensor parallelism compatibility
-            assert (
-                self.linear_num_key_heads % self.tensor_model_parallel_size == 0
-            ), "linear_num_key_heads must be a multiple of tensor_model_parallel_size."
+                # Check tensor parallelism compatibility
+                tp_cp_size = self.tensor_model_parallel_size * self.context_parallel_size
+                assert self.linear_num_key_heads % tp_cp_size == 0, (
+                    f"{self.linear_num_key_heads=} must be a multiple of "
+                    f"({self.tensor_model_parallel_size=} * {self.context_parallel_size=})."
+                )
+                assert self.linear_num_value_heads % tp_cp_size == 0, (
+                    f"{self.linear_num_value_heads=} must be a multiple of "
+                    f"({self.tensor_model_parallel_size=} * {self.context_parallel_size=})."
+                )
+        elif self.experimental_attention_variant == "dsa":
             assert (
-                self.linear_num_value_heads % self.tensor_model_parallel_size == 0
-            ), "linear_num_value_heads must be a multiple of tensor_model_parallel_size."
-
-            # Do not support yet, but coming soon.
-            assert self.context_parallel_size == 1, (
-                f"Gated delta net does not support context parallel for now,"
-                f" but got {self.context_parallel_size=}."
-            )
+                self.context_parallel_size == 1
+            ), "Currently context parallelism is not supported by DSAttention!"
+            assert not self.apply_rope_fusion, "RoPE fusion is not supported for DSAttention"
 
         if self.fp8:
             # cannot support first last layer bf16 with delayed scaling
@@ -1362,6 +1452,7 @@ def __post_init__(self):
                     "mlp",
                     "moe",
                     "shared_experts",
+                    "mhc",
                 }
                 invalid_modules = set(self.recompute_modules) - allowed_modules
                 assert not invalid_modules, (
@@ -1424,6 +1515,72 @@ def __post_init__(self):
             if "moe" not in self.recompute_modules:
                 self.recompute_modules.append("moe")
 
+        # Validation for "mhc" in recompute_modules
+        if self.recompute_granularity == "selective" and "mhc" in self.recompute_modules:
+            if not self.enable_hyper_connections:
+                raise ValueError(
+                    "'mhc' in recompute_modules requires enable_hyper_connections=True."
+                )
+            if "mlp" in self.recompute_modules:
+                raise ValueError(
+                    "'mhc' and 'mlp' in recompute_modules cannot be used together. "
+                    "They use different checkpoint mechanisms that may conflict."
+                )
+            if self.mhc_recompute_layer_num is not None and (
+                isinstance(self.mhc_recompute_layer_num, bool)
+                or not isinstance(self.mhc_recompute_layer_num, int)
+                or self.mhc_recompute_layer_num < 1
+            ):
+                raise ValueError(
+                    "mhc_recompute_layer_num must be a positive integer when "
+                    "'mhc' is in recompute_modules."
+                )
+            if self.fine_grained_activation_offloading:
+                raise ValueError(
+                    "'mhc' in recompute_modules is incompatible with "
+                    "fine_grained_activation_offloading. The mHC recompute hook fires "
+                    "before the offloading backward chunk is initialized, causing "
+                    "tensor_pop on a None chunk. Disable one of them."
+                )
+
+        if self.enable_hyper_connections and not (
+            self.recompute_granularity == "selective" and "mhc" in self.recompute_modules
+        ):
+            warnings.warn(
+                "HyperConnections are enabled but 'mhc' is not in "
+                "recompute_modules with selective recompute. Consider adding 'mhc' to "
+                "recompute_modules with selective recompute to reduce activation memory."
+            )
+
+        # Validation for use_fused_mhc
+        if self.use_fused_mhc:
+            if not self.enable_hyper_connections:
+                raise ValueError("use_fused_mhc requires enable_hyper_connections=True.")
+            try:
+                from megatron.core.fusions.fused_mhc_kernels import is_cutile_available
+
+                if not is_cutile_available():
+                    warnings.warn(
+                        "use_fused_mhc is enabled but cuda.tile (cuTile) is not installed. "
+                        "Falling back to reference mHC implementations.",
+                        UserWarning,
+                    )
+                    self.use_fused_mhc = False
+            except ImportError:
+                warnings.warn(
+                    "use_fused_mhc is enabled but fused_mhc_kernels module could not be "
+                    "imported. Falling back to reference mHC implementations.",
+                    UserWarning,
+                )
+                self.use_fused_mhc = False
+
+        # Validation for hyper_connections with MTP
+        if self.enable_hyper_connections and self.mtp_num_layers is not None:
+            raise ValueError(
+                "enable_hyper_connections is not compatible with Multi-Token Prediction (MTP). "
+                "Please disable MTP (set mtp_num_layers=None) when using hyper connections."
+            )
+
         if self.fine_grained_activation_offloading:
             assert (
                 not self.cpu_offloading
@@ -1449,6 +1606,24 @@ def __post_init__(self):
                     "because the input of attn_proj is the output of core_attn, "
                     "which is needed in core_attn.backward()."
                 )
+            if self.recompute_granularity == "selective" and "moe" in self.recompute_modules:
+                offload_inside_moe = {"moe_act", "expert_fc1"} & set(self.offload_modules)
+                assert not offload_inside_moe, (
+                    f"Cannot offload {offload_inside_moe} while recomputing the entire MoE layer. "
+                    f"'moe' in recompute_modules wraps the full MoE forward in a checkpoint, "
+                    f"so offloading activations inside it is redundant and will cause errors. "
+                    f"Either remove 'moe' from --recompute-modules or remove "
+                    f"{offload_inside_moe} from --offload-modules."
+                )
+            assert (
+                self.min_offloaded_tensor_size >= 0
+            ), "min_offloaded_tensor_size must be non-negative."
+            assert (
+                self.activation_offload_fraction >= 0 and self.activation_offload_fraction <= 1
+            ), "activation_offload_fraction must be in range [0, 1]."
+            assert (
+                self.delta_offload_bytes_across_pp_ranks >= 0
+            ), "delta_offload_bytes_across_pp_ranks must be non-negative."
 
         if (
             self.num_layers_in_first_pipeline_stage is not None
@@ -2045,6 +2220,18 @@ def __post_init__(self):
                             "moe_input_jitter_eps is not supported with graphed moe recomputation."
                         )
 
+            if self.fine_grained_activation_offloading:
+                assert (
+                    self.cuda_graph_impl == "transformer_engine"
+                ), "fine_grained_activation_offloading must be used with TE impl of cuda_graph."
+                assert (
+                    CudaGraphScope.moe not in self.cuda_graph_scope
+                ), "Token-drop MoE is temporarily not supported with activation offloading."
+                assert self.cuda_graph_warmup_steps > 0, (
+                    "cuda_graph_warmup_steps must be greater than 0 when enabling "
+                    "fine-grained activation offloading."
+                )
+
         if self.moe_token_dispatcher_type in ["allgather"]:
             if self.variable_seq_lengths is True:
                 raise ValueError(
@@ -2140,6 +2327,19 @@ def __post_init__(self):
                     'partial cuda graph'
                 )
 
+        if self.overlap_dispatch_backward_with_experts_wgrad:
+            assert not self.overlap_moe_expert_parallel_comm, (
+                'overlap_moe_expert_parallel_comm must be disabled when enabling '
+                'overlap_dispatch_backward_with_experts_wgrad.'
+            )
+            assert is_te_min_version(
+                "2.3.0"
+            ), 'TE version >= 2.3.0 is required for overlap_dispatch_backward_with_experts_wgrad'
+            assert not self.delay_wgrad_compute, (
+                'delay_wgrad_compute and overlap_dispatch_backward_with_experts_wgrad '
+                'are mutually exclusive; use only one'
+            )
+
         if self.ep_overlap_early_attn_memory_release:
             assert self.overlap_moe_expert_parallel_comm, (
                 'overlap_moe_expert_parallel_comm must be enabled when enabling '
@@ -2198,6 +2398,25 @@ def __post_init__(self):
                     f"the number of layers ({self.num_layers})"
                 )
 
+        if self.fallback_to_eager_attn:
+            assert self.transformer_impl == "transformer_engine", (
+                f"fallback_to_eager_attn is only available with transformer_engine implementation,"
+                f" but got {self.transformer_impl=}."
+            )
+
+        if self.fallback_to_eager_attn or self.transformer_impl == "local":
+            if self.context_parallel_size > 1 and self.cp_comm_type is not None:
+                all_cp_comm_types_are_all_gather = (
+                    all(item == "all_gather" for item in self.cp_comm_type)
+                    if isinstance(self.cp_comm_type, list)
+                    else self.cp_comm_type == "all_gather"
+                )
+                if not all_cp_comm_types_are_all_gather:
+                    raise ValueError(
+                        f"fallback_to_eager_attn only supports all_gather communication type "
+                        f"for context parallelism, but got {self.cp_comm_type=} instead."
+                    )
+
         if self.transformer_impl == "inference_optimized":
             assert self.normalization == "RMSNorm"
             assert not self.layernorm_zero_centered_gamma
@@ -2205,12 +2424,6 @@ def __post_init__(self):
             assert not self.add_qkv_bias
             assert not self.use_kitchen
 
-        if self.experimental_attention_variant == "dsa":
-            assert (
-                self.context_parallel_size == 1
-            ), "Currently context parallelism is not supported by DSAttention!"
-            assert not self.apply_rope_fusion, "RoPE fusion is not supported for DSAttention"
-
         if self.inference_fuse_tp_communication:
             assert self.transformer_impl == "inference_optimized", (
                 "inference_fuse_tp_communication is only supported "
@@ -2231,8 +2444,43 @@ def __post_init__(self):
                 self.attention_backend == AttnBackend.flash
             ), "Batch invariant mode only supports FlashAttention"
 
+        if self.sequence_packing_scheduler is not None:
+            # Check TE version.
+            if not HAVE_PACKAGING:
+                raise ImportError(
+                    "packaging is not installed. Please install it with `pip install packaging`."
+                )
+            # TODO: remove this after we fix the convergence issue with TE < 2.9.
+            if not (
+                is_te_min_version("2.9.0") or get_te_version() == PkgVersion("2.9.0.dev0+5b3092a")
+            ):
+                raise ValueError(
+                    "SFT sequence packing requires Transformer Engine >= 2.9.0 "
+                    f"but got {get_te_version()} (TE < 2.9.0 may have convergence issues)."
+                )
+
+            # Needed for passing variable sequences between pp stages.
+            self.variable_seq_lengths = True
+
+            # TODO(tailaim): add support for other dispatcher types
+            assert self.moe_token_dispatcher_type == "alltoall", (
+                f"sequence_packing only supports moe_token_dispatcher_type='alltoall', "
+                f"got '{self.moe_token_dispatcher_type}'"
+            )
+
+            supported_schedulers = ['dp_balanced']
+            if (
+                self.sequence_packing_scheduler is not None
+                and self.sequence_packing_scheduler not in supported_schedulers
+            ):
+                raise ValueError(
+                    f"Unsupported scheduler: {self.sequence_packing_scheduler}. "
+                    f"Available schedulers: {supported_schedulers}"
+                )
+
 
 @dataclass
+@experimental_api
 class MLATransformerConfig(TransformerConfig):
     """Configuration object for megatron-core Multi-Latent Attention (MLA) transformers.
 
diff --git a/megatron/core/transformer/transformer_layer.py b/megatron/core/transformer/transformer_layer.py
index cf63199347c..2619bcd1338 100644
--- a/megatron/core/transformer/transformer_layer.py
+++ b/megatron/core/transformer/transformer_layer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 from __future__ import annotations
 
 import functools
@@ -8,6 +8,9 @@
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Dict, Optional, Union
 
+if TYPE_CHECKING:
+    from megatron.core.tensor_parallel.random import CheckpointManager
+
 import torch
 import torch.distributed
 from torch import Tensor
@@ -30,6 +33,7 @@
     deprecate_inference_params,
     get_pg_rank,
     is_te_min_version,
+    is_torch_min_version,
     log_single_rank,
     make_viewless_tensor,
     nvtx_range_pop,
@@ -42,6 +46,16 @@
 logger = logging.getLogger(__name__)
 
 
+@functools.lru_cache(maxsize=None)
+def _get_offloading_interface():
+    """Get the offloading interface for fine-grained activation offloading."""
+    from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+        FineGrainedActivationOffloadingInterface,
+    )
+
+    return FineGrainedActivationOffloadingInterface
+
+
 def get_transformer_layer_offset(
     config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
 ):
@@ -228,14 +242,17 @@ class TransformerLayerSubmodules:
     """
 
     input_layernorm: LayerNormBuilder = IdentityOp
+    self_attention_hyper_connection: Union[ModuleSpec, type] = IdentityOp
     self_attention: Union[ModuleSpec, type] = IdentityOp
     self_attn_bda: Union[ModuleSpec, type] = IdentityFuncOp
 
     pre_cross_attn_layernorm: LayerNormBuilder = IdentityOp
+    cross_attention_hyper_connection: Union[ModuleSpec, type] = IdentityOp
     cross_attention: Union[ModuleSpec, type] = IdentityOp
     cross_attn_bda: Union[ModuleSpec, type] = IdentityFuncOp
 
     pre_mlp_layernorm: LayerNormBuilder = IdentityOp
+    mlp_hyper_connection: Union[ModuleSpec, type] = IdentityOp
     mlp: Union[ModuleSpec, type] = IdentityOp
     mlp_bda: Union[ModuleSpec, type] = IdentityFuncOp
 
@@ -471,17 +488,10 @@ def can_recompute_pre_mlp_layernorm_for_cudagraph():
             if "mlp" in self.config.recompute_modules:
                 if not self.is_moe_layer:
                     self.recompute_mlp = True
-        self.offload_attn_norm = (
-            self.config.fine_grained_activation_offloading
-            and "attn_norm" in self.config.offload_modules
-            and not isinstance(self.input_layernorm, IdentityOp)
-        )
-        self.offload_mlp_norm = (
-            self.config.fine_grained_activation_offloading
-            and "mlp_norm" in self.config.offload_modules
-            and not isinstance(self.pre_mlp_layernorm, IdentityOp)
-        )
 
+        self._set_offload_modules()
+        self.off_interface = _get_offloading_interface()
+        self.mlp_norm_manager = None
         # @jcasper how should we handle nvfuser?
         # Set bias+dropout+add fusion grad_enable execution handler.
         # TORCH_MAJOR = int(torch.__version__.split('.')[0])
@@ -570,21 +580,18 @@ def _forward_attention(
                 context (Tensor): Updated context tensor if cross-attention is used,
                 otherwise None.
         """
-        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            FineGrainedActivationOffloadingInterface as off_interface,
-        )
-
         inference_context = deprecate_inference_params(inference_context, inference_params)
 
         # Optional Input Layer norm
+        attn_norm_manager = self.off_interface(self.offload_attn_norm, hidden_states, "attn_norm")
         if self.recompute_input_layernorm:
             self.input_layernorm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
+            with attn_norm_manager as hidden_states:
                 input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
                     apply_module(self.input_layernorm), hidden_states
                 )
         else:
-            with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
+            with attn_norm_manager as hidden_states:
                 input_layernorm_output = apply_module(self.input_layernorm)(hidden_states)
 
         if isinstance(input_layernorm_output, tuple):
@@ -606,8 +613,6 @@ def _forward_attention(
         )
 
         if using_fused_tp_inference_kernel:
-            # Set the residual for fused reduce-scatter + add + layer-norm + all-gather
-            # operation in attention's out_proj (linear_proj)
             self._set_proj_residual(residual)
 
         # Self attention.
@@ -650,10 +655,9 @@ def _forward_attention(
 
         # Delay the offload of the attention norm until after the self_attn_bda has been computed
         # because the residual is needed in the self_attn_bda.
-        if self.offload_attn_norm:
-            hidden_states = off_interface.group_commit(
-                hidden_states, name="attn_norm", forced_released_tensors=[residual]
-            )
+        hidden_states = attn_norm_manager.group_offload(
+            hidden_states, forced_released_tensors=[residual]
+        )
 
         # Optional Layer norm after self-attention
         pre_cross_attn_layernorm_output = apply_module(self.pre_cross_attn_layernorm)(hidden_states)
@@ -700,6 +704,11 @@ def forward(self, *args, **kwargs):
         This method calls the core computation of a transformer layer, including
         self-attention, cross-attention (if applicable), and feed-forward operations.
         """
+        # Injected by __call__ for cuda graph keying; not a real forward arg.
+        kwargs.pop("dynamic_inference_decode_only", None)
+        assert (
+            not self.config.enable_hyper_connections
+        ), "Please use HyperConnectionTransformerLayer instead"
         hidden_states, context = self._forward_attention(*args, **kwargs)
         output = self._forward_mlp(
             hidden_states,
@@ -709,18 +718,15 @@ def forward(self, *args, **kwargs):
         return output, context
 
     def _forward_pre_mlp_layernorm(self, hidden_states: Tensor):
-        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            FineGrainedActivationOffloadingInterface as off_interface,
-        )
-
+        self.mlp_norm_manager = self.off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm")
         if self.recompute_pre_mlp_layernorm:
             self.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
-            with off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm") as hidden_states:
+            with self.mlp_norm_manager as hidden_states:
                 pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
                     apply_module(self.pre_mlp_layernorm), hidden_states
                 )
         else:
-            with off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm") as hidden_states:
+            with self.mlp_norm_manager as hidden_states:
                 pre_mlp_layernorm_output = apply_module(self.pre_mlp_layernorm)(hidden_states)
 
         return pre_mlp_layernorm_output
@@ -850,9 +856,6 @@ def _forward_post_mlp(
         Returns:
             output (Tensor): Transformed hidden states of shape [s, b, h].
         """
-        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
-            FineGrainedActivationOffloadingInterface as off_interface,
-        )
 
         using_fused_tp_inference_kernel = (not self.training) and (
             self.config.inference_fuse_tp_communication
@@ -881,10 +884,11 @@ def _forward_post_mlp(
         nvtx_range_pop(suffix="mlp_bda")
         # Delay the offload of the mlp norm until after the mlp_bda has been computed
         # because the residual is needed in the mlp_bda.
-        if self.offload_mlp_norm:
-            hidden_states = off_interface.group_commit(
-                hidden_states, name="mlp_norm", forced_released_tensors=[residual]
+        if self.mlp_norm_manager is not None:
+            hidden_states = self.mlp_norm_manager.group_offload(
+                hidden_states, forced_released_tensors=[residual]
             )
+            self.mlp_norm_manager = None
 
         # Jit compiled function creates 'view' tensor. This tensor
         # potentially gets saved in the MPU checkpoint function context,
@@ -1039,6 +1043,18 @@ def _te_cuda_graph_capture(self, *args, **kwargs):
            attribute can be set to control the scope of the CUDA graph.
         2. If context is None, it cannot be returned as output.
         """
+        # Record the backward event on cuda graph stream in backward pass.
+        # This is to ensure the main stream waits for computing on cuda graph stream to complete,
+        # and overlaps with the H2D transfer on reload stream.
+        if self.offload_module_in_cuda_graph:
+            if len(args) > 0:
+                hidden_states = args[0]
+                hidden_states = self.off_interface.backward_record(hidden_states)
+                args = (hidden_states,) + args[1:]
+            else:
+                hidden_states = kwargs.pop("hidden_states")
+                hidden_states = self.off_interface.backward_record(hidden_states)
+                kwargs["hidden_states"] = hidden_states
         context = None
         if not self.config.cuda_graph_scope or CudaGraphScope.attn in self.config.cuda_graph_scope:
             hidden_states, context = self._forward_attention(*args, **kwargs)
@@ -1066,6 +1082,11 @@ def _te_cuda_graph_capture(self, *args, **kwargs):
             cuda_graph_outputs = list(hidden_states)
         if context is not None:
             cuda_graph_outputs.append(context)
+        # Record the forward event on cuda graph stream for cuda graph capture.
+        # This is to ensure the main stream waits for computing on cuda graph stream to complete,
+        # and overlaps with the D2H transfer on offloading stream.
+        if self.offload_module_in_cuda_graph:
+            self.off_interface.forward_record()
         return tuple(cuda_graph_outputs)
 
     def _te_cuda_graph_replay(self, *args, **kwargs):
@@ -1089,8 +1110,25 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
             "For inference cuda graph, please use cuda_graph_impl=local instead."
         )
 
+        if self.config.delay_offload_until_cuda_graph:
+            self.off_interface.enter_replay()
+
+        try:
+            return self._te_cuda_graph_replay_impl(args, kwargs, context)
+        finally:
+            if self.config.delay_offload_until_cuda_graph:
+                self.off_interface.exit_replay()
+
+    def _te_cuda_graph_replay_impl(self, args, kwargs, context):
+        """Implementation of _te_cuda_graph_replay, separated for replay mode cleanup."""
         cuda_graph_output = list(super()._te_cuda_graph_replay(*args, **kwargs))
 
+        # Flush delayed offload groups from previous layers after graph replay.
+        # The CPU is idle during the sync between graph replay and a2a comm,
+        # so we use that time to execute the delayed offload operations.
+        if self.config.delay_offload_until_cuda_graph:
+            self.off_interface.flush_delayed_groups()
+
         if kwargs.get('context') is not None:
             context = cuda_graph_output.pop()
 
@@ -1127,11 +1165,7 @@ def _te_cuda_graph_replay(self, *args, **kwargs):
                     valid_cudagraph_attrs
                 ), f"attr_outputs: {len(attr_outputs)} != {len(valid_cudagraph_attrs)}"
                 for i, attr_name in enumerate(valid_cudagraph_attrs):
-                    hier_attr_name = attr_name.split('.')
-                    attr = self.mlp.token_dispatcher
-                    for name in hier_attr_name[:-1]:
-                        attr = getattr(attr, name)
-                    setattr(attr, hier_attr_name[-1], attr_outputs[i])
+                    self.mlp.token_dispatcher.set_cudagraph_attr(attr_name, attr_outputs[i])
             else:
                 # CUDA graph output is [hidden_states, probs, routing_map].
                 assert len(cuda_graph_output) == 3, (
@@ -1280,6 +1314,110 @@ def _should_call_local_cudagraph(self, *args, **kwargs):
                 return True
         return False
 
+    def backward_dw_cudagraph(self, microbatch_idx):
+        """
+        CUDA Graph backward weight gradient computation for this layer.
+        """
+        cg_index = microbatch_idx % len(self.cuda_graphs)
+        if not hasattr(self.cuda_graphs[cg_index], 'backward_dw'):
+            return
+        self.cuda_graphs[cg_index].backward_dw()
+
+    def __call__(self, *args, **kwargs):
+        # Extract mhc_recompute_manager before CUDA graph manager processes kwargs,
+        # since CheckpointManager is not a CUDA-graph-supported type.
+        self._mhc_recompute_manager = kwargs.pop("mhc_recompute_manager", None)
+        kwargs.pop("is_last_layer_in_recompute_block", None)
+
+        if self._should_call_local_cudagraph(*args, **kwargs):
+            # Inference mode.
+            if kwargs.get('inference_context') is not None:
+                # dynamic_inference_decode_only is not a real argument to forward, it is only used
+                # to differentiate the cuda graph used for decode from the one used for non-decode
+                # inference.
+                kwargs["dynamic_inference_decode_only"] = kwargs[
+                    'inference_context'
+                ].is_decode_only()
+
+        return super().__call__(*args, **kwargs)
+
+    def _set_offload_modules(self):
+        """Set the offload modules for the transformer layer."""
+        if self.config.fine_grained_activation_offloading:
+            self.offload_attn_norm = "attn_norm" in self.config.offload_modules and not isinstance(
+                self.input_layernorm, IdentityOp
+            )
+            self.offload_qkv_linear = "qkv_linear" in self.config.offload_modules
+            self.offload_core_attn = "core_attn" in self.config.offload_modules
+            self.offload_attn_proj = "attn_proj" in self.config.offload_modules
+            self.offload_mlp_norm = "mlp_norm" in self.config.offload_modules and not isinstance(
+                self.pre_mlp_layernorm, IdentityOp
+            )
+            self.offload_expert_fc1 = "expert_fc1" in self.config.offload_modules
+            self.offload_moe_act = "moe_act" in self.config.offload_modules
+        else:
+            self.offload_attn_norm = False
+            self.offload_qkv_linear = False
+            self.offload_core_attn = False
+            self.offload_attn_proj = False
+            self.offload_mlp_norm = False
+            self.offload_expert_fc1 = False
+            self.offload_moe_act = False
+        # Check the compatibility of fine-grained activation offloading and cuda graph.
+        if self.config.fine_grained_activation_offloading:
+            if CudaGraphScope.attn in self.config.cuda_graph_scope:
+                self.offload_attn_norm = False
+                log_single_rank(
+                    logger,
+                    logging.WARNING,
+                    "attn_norm offloading is not supported with attn cudagraph. "
+                    "Disabling attn_norm offloading.",
+                )
+            mark_mlp_norm_offloading_not_supported = False
+            # For moe layer, mlp_norm offloading isn't supported with attn or moe_router cudagraph.
+            if self.is_moe_layer:
+                if (
+                    CudaGraphScope.attn in self.config.cuda_graph_scope
+                    or CudaGraphScope.moe_router in self.config.cuda_graph_scope
+                ):
+                    mark_mlp_norm_offloading_not_supported = True
+            # For non-moe layer, mlp_norm is the boundary of attn or mlp cudagraph.
+            # The only case where mlp_norm offloading is supported is when whole layer is captured.
+            elif (
+                CudaGraphScope.attn in self.config.cuda_graph_scope
+                and CudaGraphScope.mlp not in self.config.cuda_graph_scope
+            ) or (
+                CudaGraphScope.attn not in self.config.cuda_graph_scope
+                and CudaGraphScope.mlp in self.config.cuda_graph_scope
+            ):
+                mark_mlp_norm_offloading_not_supported = True
+            if mark_mlp_norm_offloading_not_supported:
+                self.offload_mlp_norm = False
+                log_single_rank(
+                    logger,
+                    logging.WARNING,
+                    "mlp_norm offloading is not supported with the current cudagraph scope. "
+                    "Disabling mlp_norm offloading.",
+                )
+        # Set the offload module in cuda graph flag.
+        self.offload_module_in_cuda_graph = False
+        if CudaGraphScope.attn in self.config.cuda_graph_scope:
+            if self.offload_core_attn or self.offload_attn_proj or self.offload_qkv_linear:
+                self.offload_module_in_cuda_graph = True
+        if not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope:
+            if self.offload_mlp_norm:
+                self.offload_module_in_cuda_graph = True
+        if self.offload_module_in_cuda_graph:
+            assert is_torch_min_version(
+                "2.9.0a0"
+            ), "Offloading modules captured in cuda graph requires torch>=2.9.0."
+            assert is_te_min_version(
+                "2.14.0"
+            ), "Offloading modules captured in cuda graph requires TE>=2.14.0."
+            assert (
+                self.config.cuda_graph_warmup_steps > 0
+            ), "Fine-grained activation offloading needs cuda_graph_warmup_steps > 0."
+
     def get_layer_norm_weights(self):
         """
         Get the weights of all layernorms (attention and MLP) in the transformer layer.
@@ -1289,6 +1427,361 @@ def get_layer_norm_weights(self):
         return
 
 
+class HyperConnectionTransformerLayer(TransformerLayer):
+    """A transformer layer with Manifold-Constrained Hyper-Connections (mHC).
+
+    Extends TransformerLayer by adding hyper connection modules around self-attention
+    and MLP. The n-stream hidden states are aggregated before each sub-layer and
+    expanded back afterwards using learned mappings (H_pre, H_post, H_res).
+
+    Cross-attention hyper connection is not supported.
+    """
+
+    def __init__(
+        self,
+        config: TransformerConfig,
+        submodules: TransformerLayerSubmodules,
+        layer_number: int = 1,
+        hidden_dropout: Optional[float] = None,
+        pg_collection: Optional[ProcessGroupCollection] = None,
+        vp_stage: Optional[int] = None,
+    ):
+        super().__init__(
+            config=config,
+            submodules=submodules,
+            layer_number=layer_number,
+            hidden_dropout=hidden_dropout,
+            pg_collection=pg_collection,
+            vp_stage=vp_stage,
+        )
+
+        if submodules.cross_attention_hyper_connection is not IdentityOp:
+            raise ValueError(
+                "HyperConnectionTransformerLayer does not support cross-attention "
+                "hyper connections. Use IdentityOp for cross_attention_hyper_connection."
+            )
+
+        assert submodules.self_attention_hyper_connection is not IdentityOp, (
+            "HyperConnectionTransformerLayer requires self_attention_hyper_connection. "
+            "Use TransformerLayer instead if hyper connections are not needed."
+        )
+        assert submodules.mlp_hyper_connection is not IdentityOp, (
+            "HyperConnectionTransformerLayer requires mlp_hyper_connection. "
+            "Use TransformerLayer instead if hyper connections are not needed."
+        )
+
+        self.self_attention_hyper_connection = build_module(
+            submodules.self_attention_hyper_connection,
+            config=self.config,
+            layer_number=self.layer_number,
+        )
+
+        self.mlp_hyper_connection = build_module(
+            submodules.mlp_hyper_connection, config=self.config, layer_number=self.layer_number
+        )
+
+        # When mHC recompute is active, skip checkpointing if the layernorm
+        # is IdentityOp (fused into TE linear) — there is nothing to recompute.
+        self.mhc_checkpoint_input_layernorm = not isinstance(self.input_layernorm, IdentityOp)
+        self.mhc_checkpoint_pre_mlp_layernorm = not isinstance(self.pre_mlp_layernorm, IdentityOp)
+
+    def get_layer_static_inputs(self, seq_length, micro_batch_size):
+        """Override to produce n-stream hidden_states of shape [s, b, n*C].
+
+        CUDA graph capture creates static buffers whose shapes are determined by
+        this method. The base class returns [s, b, C], but mHC layers operate on
+        n-stream hidden states of shape [s, b, n*C].
+        """
+        static_inputs = super().get_layer_static_inputs(seq_length, micro_batch_size)
+        hs = static_inputs["hidden_states"]
+        n = self.config.num_residual_streams
+        static_inputs["hidden_states"] = torch.ones(
+            (hs.shape[0], hs.shape[1], n * self.config.hidden_size),
+            dtype=hs.dtype,
+            requires_grad=hs.requires_grad,
+            device=hs.device,
+        )
+        return static_inputs
+
+    def _get_submodules_under_cudagraphs(self):
+        """Override to include hyper connection modules.
+
+        The base TransformerLayer._get_submodules_under_cudagraphs does not include
+        self_attention_hyper_connection / mlp_hyper_connection. Their learnable
+        parameters (mapping_proj, alpha_*, bias) need manual pre-forward hooks
+        during CUDA graph replay so that parameter all-gathers are triggered.
+        """
+        submodules = super()._get_submodules_under_cudagraphs()
+
+        if not self.config.cuda_graph_scope:
+            return submodules
+
+        if CudaGraphScope.attn in self.config.cuda_graph_scope:
+            submodules.append(self.self_attention_hyper_connection)
+        if (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope) or (
+            self.is_moe_layer and CudaGraphScope.moe in self.config.cuda_graph_scope
+        ):
+            submodules.append(self.mlp_hyper_connection)
+        return submodules
+
+    def forward(self, *args, **kwargs):
+        """Forward pass with MHC recompute manager support."""
+        kwargs.pop("dynamic_inference_decode_only", None)
+
+        mhc_recompute_manager = getattr(self, '_mhc_recompute_manager', None)
+
+        hidden_states, context = self._forward_attention(
+            *args, mhc_recompute_manager=mhc_recompute_manager, **kwargs
+        )
+
+        output = self._forward_mlp(
+            hidden_states,
+            kwargs.get("inference_context", None),
+            padding_mask=kwargs.get("padding_mask", None),
+            mhc_recompute_manager=mhc_recompute_manager,
+        )
+        return output, context
+
+    def _forward_attention(
+        self,
+        hidden_states: Tensor,
+        attention_mask: Optional[Tensor] = None,
+        context: Optional[Tensor] = None,
+        context_mask: Optional[Tensor] = None,
+        rotary_pos_emb: Optional[Tensor] = None,
+        rotary_pos_cos: Optional[Tensor] = None,
+        rotary_pos_sin: Optional[Tensor] = None,
+        rotary_pos_cos_sin: Optional[Tensor] = None,
+        attention_bias: Optional[Tensor] = None,
+        inference_context: Optional[Any] = None,
+        packed_seq_params: Optional[PackedSeqParams] = None,
+        sequence_len_offset: Optional[Tensor] = None,
+        padding_mask: Optional[Tensor] = None,
+        mhc_recompute_manager: Optional['CheckpointManager'] = None,
+        *,
+        inference_params: Optional[Any] = None,
+    ):
+        """Forward attention with hyper connection pre/post processing on self-attention."""
+        inference_context = deprecate_inference_params(inference_context, inference_params)
+
+        residual = hidden_states
+
+        nvtx_range_push(suffix="self_attention_hyper_connection")
+        hidden_states, self_attn_h_res, self_attn_hc_h_post = self.self_attention_hyper_connection(
+            hidden_states, mhc_recompute_manager=mhc_recompute_manager
+        )
+        nvtx_range_pop(suffix="self_attention_hyper_connection")
+
+        # Optional Input Layer norm
+        checkpoint_input_layernorm = self.recompute_input_layernorm or (
+            mhc_recompute_manager is not None and self.mhc_checkpoint_input_layernorm
+        )
+        attn_norm_manager = self.off_interface(self.offload_attn_norm, hidden_states, "attn_norm")
+        if checkpoint_input_layernorm:
+            self.input_layernorm_checkpoint = tensor_parallel.CheckpointWithoutOutput(
+                ckpt_manager=mhc_recompute_manager
+            )
+            with attn_norm_manager as hidden_states:
+                input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
+                    self.input_layernorm, hidden_states
+                )
+        else:
+            with attn_norm_manager as hidden_states:
+                input_layernorm_output = self.input_layernorm(hidden_states)
+
+        # Self attention.
+        nvtx_range_push(suffix="self_attention")
+        attention_output_with_bias = self.self_attention(
+            input_layernorm_output,
+            attention_mask=attention_mask,
+            inference_context=inference_context,
+            rotary_pos_emb=rotary_pos_emb,
+            rotary_pos_cos=rotary_pos_cos,
+            rotary_pos_sin=rotary_pos_sin,
+            rotary_pos_cos_sin=rotary_pos_cos_sin,
+            attention_bias=attention_bias,
+            packed_seq_params=packed_seq_params,
+            sequence_len_offset=sequence_len_offset,
+        )
+        nvtx_range_pop(suffix="self_attention")
+
+        if checkpoint_input_layernorm:
+            self.input_layernorm_checkpoint.discard_output_and_register_recompute(
+                attention_output_with_bias[0]
+            )
+
+        nvtx_range_push(suffix="self_attention_fused_h_res_h_post_bda")
+        with self.bias_dropout_add_exec_handler():
+            hidden_states = self.self_attention_hyper_connection.fused_h_res_h_post_bda(
+                self_attn_h_res,
+                residual,
+                self_attn_hc_h_post,
+                attention_output_with_bias,
+                self.hidden_dropout,
+                self.training,
+                self.config.bias_dropout_fusion,
+                mhc_recompute_manager,
+            )
+        nvtx_range_pop(suffix="self_attention_fused_h_res_h_post_bda")
+
+        hidden_states = attn_norm_manager.group_offload(hidden_states)
+
+        # Cross-attention (no hyper connection support).
+        residual = hidden_states
+        pre_cross_attn_layernorm_output = self.pre_cross_attn_layernorm(hidden_states)
+
+        attention_output_with_bias = self.cross_attention(
+            pre_cross_attn_layernorm_output,
+            attention_mask=context_mask,
+            key_value_states=context,
+            inference_context=inference_context,
+        )
+
+        if isinstance(attention_output_with_bias, dict) and "context" in attention_output_with_bias:
+            context = attention_output_with_bias["context"]
+
+        with self.bias_dropout_add_exec_handler():
+            hidden_states = self.cross_attn_bda(self.training, self.config.bias_dropout_fusion)(
+                attention_output_with_bias, residual, self.hidden_dropout
+            )
+
+        return hidden_states, context
+
+    def _forward_mlp(
+        self,
+        hidden_states,
+        inference_context=None,
+        padding_mask=None,
+        mhc_recompute_manager: Optional['CheckpointManager'] = None,
+    ):
+        """Forward MLP with hyper connection pre/post processing."""
+        is_last_in_recompute_block = bool(
+            mhc_recompute_manager is not None
+            and getattr(mhc_recompute_manager, "is_last_layer_in_recompute_block", False)
+        )
+        mhc_mlp_bda_manager = None if is_last_in_recompute_block else mhc_recompute_manager
+
+        residual = hidden_states
+
+        nvtx_range_push(suffix="mlp_hyper_connection")
+        hidden_states, mlp_h_res, mlp_hc_h_post = self.mlp_hyper_connection(
+            hidden_states, mhc_recompute_manager=mhc_recompute_manager
+        )
+        nvtx_range_pop(suffix="mlp_hyper_connection")
+
+        # Optional Layer norm post the cross-attention.
+        checkpoint_pre_mlp_layernorm = self.recompute_pre_mlp_layernorm or (
+            mhc_recompute_manager is not None and self.mhc_checkpoint_pre_mlp_layernorm
+        )
+        self.mlp_norm_manager = self.off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm")
+        if checkpoint_pre_mlp_layernorm:
+            self.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput(
+                ckpt_manager=mhc_recompute_manager
+            )
+            with self.mlp_norm_manager as hidden_states:
+                pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
+                    self.pre_mlp_layernorm, hidden_states
+                )
+        else:
+            with self.mlp_norm_manager as hidden_states:
+                pre_mlp_layernorm_output = self.pre_mlp_layernorm(hidden_states)
+
+        nvtx_range_push(suffix="mlp")
+        should_chunk_mlp_for_prefill = (
+            self.config.mlp_chunks_for_prefill > 1
+            and inference_context is not None
+            and not inference_context.is_decode_only()
+            and not isinstance(self.mlp, IdentityOp)
+            and not self.config.transformer_impl == "inference_optimized"
+        )
+
+        if self.recompute_mlp:
+            if self.config.fp8 or self.config.fp4:
+                from megatron.core.extensions.transformer_engine import te_checkpoint
+
+                mlp_output_with_bias = te_checkpoint(
+                    self.mlp,
+                    False,
+                    tensor_parallel.random.get_cuda_rng_tracker,
+                    self.pg_collection.tp,
+                    pre_mlp_layernorm_output,
+                    padding_mask=padding_mask,
+                )
+            else:
+                mlp_output_with_bias = tensor_parallel.checkpoint(
+                    functools.partial(self.mlp, padding_mask=padding_mask),
+                    False,
+                    pre_mlp_layernorm_output,
+                )
+        elif should_chunk_mlp_for_prefill:
+            num_chunks = min(self.config.mlp_chunks_for_prefill, pre_mlp_layernorm_output.shape[0])
+            chunks = pre_mlp_layernorm_output.chunk(num_chunks, dim=0)
+            outputs = [self.mlp(chunk) for chunk in chunks]
+            mlp_output = torch.cat([out for out, _ in outputs], dim=0)
+            bias_chunks = [bias for _, bias in outputs if bias is not None]
+            bias_output = torch.stack(bias_chunks, dim=0).sum(dim=0) if bias_chunks else None
+            mlp_output_with_bias = (mlp_output, bias_output)
+        else:
+            mlp_output_with_bias = self.mlp(pre_mlp_layernorm_output, padding_mask=padding_mask)
+
+        nvtx_range_pop(suffix="mlp")
+
+        return self._forward_post_mlp_with_fused_hyper_connection(
+            mlp_output_with_bias, mlp_h_res, residual, mlp_hc_h_post, mhc_mlp_bda_manager
+        )
+
+    def _forward_post_mlp_with_fused_hyper_connection(
+        self,
+        mlp_output_with_bias,
+        mlp_h_res,
+        residual,
+        mlp_hc_h_post,
+        mhc_mlp_bda_recompute_manager: Optional['CheckpointManager'] = None,
+    ):
+        """
+        Perform operations after the MLP computation with fused hyper connection kernel.
+
+        This method uses the fused kernel combining apply_h_res, apply_h_post and bias-dropout-add.
+
+        Args:
+            mlp_output_with_bias (Tensor): Output tensor of the MLP layer with bias.
+            mlp_h_res (Tensor): [s, b, n, n] - residual mixing matrix from hyper connection.
+            residual (Tensor): [s, b, n*C] - original residual (n-stream hidden states).
+            mlp_hc_h_post (Tensor): [s, b, n] - expansion weights from hyper connection.
+            mhc_recompute_manager: Optional CheckpointManager for checkpoint management.
+
+        Returns:
+            output (Tensor): Transformed hidden states of shape [s, b, h].
+        """
+        if self.recompute_pre_mlp_layernorm or (
+            mhc_mlp_bda_recompute_manager is not None and self.mhc_checkpoint_pre_mlp_layernorm
+        ):
+            self.pre_mlp_norm_checkpoint.discard_output_and_register_recompute(
+                mlp_output_with_bias[0]
+            )
+
+        nvtx_range_push(suffix="mlp_fused_h_res_h_post_bda")
+        with self.bias_dropout_add_exec_handler():
+            hidden_states = self.mlp_hyper_connection.fused_h_res_h_post_bda(
+                mlp_h_res,
+                residual,
+                mlp_hc_h_post,
+                mlp_output_with_bias,
+                self.hidden_dropout,
+                self.training,
+                self.config.bias_dropout_fusion,
+                mhc_mlp_bda_recompute_manager,
+            )
+        nvtx_range_pop(suffix="mlp_fused_h_res_h_post_bda")
+
+        hidden_states = self.mlp_norm_manager.group_offload(hidden_states)
+
+        output = make_viewless_tensor(
+            inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True
+        )
+        return output
+
+
 class MoETransformerLayer(TransformerLayer):
     """
     A Transformer layer specialized for Mixture-of-Experts (MoE) architectures.
@@ -1412,10 +1905,7 @@ def _forward_mlp_router(self, hidden_states, padding_mask=None):
         )
 
         for attr_name in self.mlp.token_dispatcher.cudagraph_attrs:
-            hier_attr_name = attr_name.split('.')
-            attr = self.mlp.token_dispatcher
-            for name in hier_attr_name:
-                attr = getattr(attr, name)
+            attr = self.mlp.token_dispatcher.get_cudagraph_attr(attr_name)
             if torch.is_tensor(attr):
                 if attr_name in self.token_dispatcher_attrs:
                     self.token_dispatcher_attrs[attr_name].copy_(attr)
@@ -1433,12 +1923,8 @@ def _forward_mlp_expert_compute(self, hidden_states, probs):
         step runs eagerly between the router and postprocess graph replays.
         """
 
-        for attr_name, attr in self.token_dispatcher_attrs.items():
-            hier_attr_name = attr_name.split('.')
-            obj = self.mlp.token_dispatcher
-            for name in hier_attr_name[:-1]:
-                obj = getattr(obj, name)
-            setattr(obj, hier_attr_name[-1], attr)
+        for name, attr in self.token_dispatcher_attrs.items():
+            self.mlp.token_dispatcher.set_cudagraph_attr(name, attr)
 
         self.mlp.fwd_execution_map = "expert_compute"
         return self.mlp(None, intermediate_tensors=(hidden_states, probs))
diff --git a/megatron/core/utils.py b/megatron/core/utils.py
index 58e86e09247..a0b77eb4052 100644
--- a/megatron/core/utils.py
+++ b/megatron/core/utils.py
@@ -922,6 +922,12 @@ def make_tp_sharded_tensor_for_checkpoint(
     # Pop group parameters from kwargs
     tp_group = kwargs.pop('tp_group', None)
     dp_cp_group = kwargs.pop('dp_cp_group', None)
+    # If there are any additional kwargs left, surface them for visibility
+    # (these will be forwarded to ShardedTensor.from_rank_offsets).
+    if kwargs:
+        logger.warning(
+            "make_tp_sharded_tensor_for_checkpoint received extra kwargs: %s", list(kwargs.keys())
+        )
 
     prepend_axis_num = len(prepend_offsets)
 
@@ -987,6 +993,12 @@ def make_sharded_tensor_for_checkpoint(tensor, key, prepend_offsets=(), replica_
     # Pop group parameters from kwargs
     tp_group = kwargs.pop('tp_group', None)
     dp_cp_group = kwargs.pop('dp_cp_group', None)
+    # If there are any additional kwargs left, surface them for visibility
+    # (these will be forwarded to ShardedTensor.from_rank_offsets).
+    if kwargs:
+        logger.warning(
+            "make_sharded_tensor_for_checkpoint received extra kwargs: %s", list(kwargs.keys())
+        )
 
     prepend_axis_num = len(prepend_offsets)
 
@@ -2057,11 +2069,11 @@ def get_thd_batch_on_this_cp_rank(
 
 
 ################################
-### hybrid context parallel ###
+### dynamic context parallel ###
 ################################
 
 
-def get_batch_on_this_hybrid_cp_rank(
+def get_batch_on_this_dynamic_cp_rank(
     batch: Dict[str, Any],
     local_cp_size: int,
     cp_group: Optional[torch.distributed.ProcessGroup] = None,
@@ -2071,18 +2083,15 @@ def get_batch_on_this_hybrid_cp_rank(
     """
     assert local_cp_size is not None
     if cp_group is None:
-        # Get the local cp group required for as defined by the HybridCPDataLoaderWrapper
-        if local_cp_size > 1:
-            cp_group = parallel_state.get_hybrid_data_context_parallel_groups(
-                group_size=local_cp_size
-            )
+        # Get the local cp group required for as defined by the DynamicCPDataLoaderWrapper
+        cp_group = parallel_state.get_dynamic_data_context_parallel_groups(group_size=local_cp_size)
     else:
         # If cp group is provided, it must match the local cp size
-        # as defined by the HybridCPDataLoaderWrapper
+        # as defined by the DynamicCPDataLoaderWrapper
         assert cp_group.size() == local_cp_size
 
     # Convert [seqlen] to [1, seqlen] similar to default collate_fn
-    # as hybrid_context_parallel dataloader wrapper does not go through default collate_fn
+    # as dynamic_context_parallel dataloader wrapper does not go through default collate_fn
     for key, data in batch.items():
         if key in ['attention_mask']:
             continue
@@ -2102,8 +2111,8 @@ def get_batch_on_this_hybrid_cp_rank(
         cp_group=cp_group,
     )
 
-    if cp_group is not None and cp_group.size() > 1:
-        # When using hybrid_context_parallel, each sub-sample of a packed sample is
+    if cp_group.size() > 1:
+        # When using dynamic_context_parallel, each sub-sample of a packed sample is
         # required to be divisible by CP*DP*2 or CP*DP*TP*2 (if using sequence parallel)
         batch = get_batch_on_this_cp_rank(batch, cp_group=cp_group)
 
diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py
index d1b5c34c619..b3b10d7e8bb 100644
--- a/megatron/training/arguments.py
+++ b/megatron/training/arguments.py
@@ -965,14 +965,13 @@ def validate_args(args, defaults={}):
 
         assert args.ckpt_format == "fsdp_dtensor", \
             "Megatron FSDP only supports fsdp_dtensor checkpoint format"
-        
+
+        args.reuse_grad_buf_for_mxfp8_param_ag = False
+
     if args.fsdp_manual_registration:
         assert args.use_megatron_fsdp, "FSDP manual registration is only supported with Megatron FSDP"
         assert args.nccl_ub, "FSDP manual registration is only supported with nccl-ub option"
 
-        if args.use_megatron_fsdp:
-            args.reuse_grad_buf_for_mxfp8_param_ag = False
-
     # Parameters dtype.
     args.params_dtype = torch.float
     if args.fp16:
@@ -1030,13 +1029,6 @@ def validate_args(args, defaults={}):
     if args.rl_use_sequence_packing:
         args.consumed_train_bins = 0
 
-    # Support for variable sequence lengths across batches/microbatches.
-    # set it if the dataloader supports generation of variable sequence lengths
-    # across batches/microbatches. Due to additional communication overhead
-    # during pipeline parallelism, it should not be set if sequence length
-    # is constant during training.
-    args.variable_seq_lengths = False
-
     # Iteration-based training.
     # Skip these checks when skip_train is set: LR config is irrelevant.
     if args.train_iters and not args.skip_train:
@@ -1201,12 +1193,17 @@ def validate_args(args, defaults={}):
     if args.tp_comm_overlap:
         assert args.sequence_parallel == True, 'Tensor parallel communication/GEMM overlap can happen only when sequence parallelism is enabled'
 
-    if args.hybrid_context_parallel:
-        assert not args.pipeline_model_parallel_size > 1, 'Hybrid context parallelism not supported with pipeline parallelism'
-        assert not args.enable_cuda_graph, 'Hybrid context parallelism not supported with CUDA Graph'
-        assert not args.use_megatron_fsdp, 'Hybrid context parallelism not supported with Megatron FSDP'
-        assert args.dataloader_type == 'single', 'Hybrid context parallelism only supported with single dataloader type'
-        assert args.calculate_per_token_loss, 'Hybrid context parallelism must be used with --calculate-per-token-loss'
+    if args.dynamic_context_parallel:
+        assert not args.pipeline_model_parallel_size > 1, 'Dynamic context parallelism not supported with pipeline parallelism'
+        assert not args.enable_cuda_graph, 'Dynamic context parallelism not supported with CUDA Graph'
+        assert not args.use_megatron_fsdp, 'Dynamic context parallelism not supported with Megatron FSDP'
+        assert args.dataloader_type == 'single', 'Dynamic context parallelism only supported with single dataloader type'
+        assert args.calculate_per_token_loss, 'Dynamic context parallelism must be used with --calculate-per-token-loss'
+
+    if args.sequence_packing_scheduler is not None:
+        assert args.context_parallel_size * args.max_seqlen_per_dp_cp_rank >= args.seq_length, \
+            f'Packed sequence buffer size ({args.context_parallel_size * args.max_seqlen_per_dp_cp_rank}) ' \
+            f'must be >= single sequence max length ({args.seq_length})'
 
     # disable async_tensor_model_parallel_allreduce when
     # model parallel memory optimization is enabled
@@ -1472,17 +1469,26 @@ def validate_args(args, defaults={}):
             '--no-load-optim with --skip-train --perform-rl-step skips the optimizer; ' \
             '--rl-offload-optimizer-during-inference is incompatible (no optimizer to offload).'
 
-    # Muon optimizer check
-    if 'muon' in args.optimizer:
+    # emerging optimizer check
+    if args.optimizer not in ('sgd', 'adam'):
+        if args.optimizer == 'dist_muon':
+            warn_rank_0(
+                "optimizer='dist_muon' is deprecated. "
+                "Use --optimizer muon --use-distributed-optimizer instead."
+            )
+            args.optimizer = 'muon'
+            args.use_layer_wise_distributed_optimizer = True
 
-        if args.optimizer == 'muon':
-            assert not args.overlap_grad_reduce, "Muon optimizer does not support overlap grad reduce. Use dist_muon instead."
-            assert not args.overlap_param_gather, "Muon optimizer does not support overlap param gather. Use dist_muon instead."
+        if args.use_distributed_optimizer:
+            args.use_layer_wise_distributed_optimizer = True
+            args.use_distributed_optimizer = False
 
         assert not args.use_distributed_optimizer, "Muon optimizer does not support distributed optimizer for now."
         assert not args.use_torch_fsdp2, "Muon optimizer does not support Torch-FSDP2 for now."
         assert not args.use_megatron_fsdp, "Muon optimizer does not support Megatron-FSDP for now."
         assert args.ckpt_format in ["torch", "torch_dist"], "Muon optimizer supports torch and torch_dist checkpoint format."
+        assert args.experimental_attention_variant is None, "Muon optimizer does not support attention variant for now."
+        assert not args.attention_output_gate, "Muon optimizer does not support attention output gate for now."
 
     # Optimizer CPU offload check
     if args.optimizer_cpu_offload:
@@ -1495,6 +1501,11 @@ def validate_args(args, defaults={}):
             "must be used in conjunction with `--fp8-recipe delayed`."
         )
 
+    if args.offload_optimizer_states:
+        assert args.use_distributed_optimizer, "offload_optimizer_states is only supported with distributed optimizer"
+        assert args.optimizer == 'adam', "offload_optimizer_states is only supported with adam optimizer"
+        assert not args.use_megatron_fsdp, "offload_optimizer_states does not support Megatron-FSDP for now."
+
     if args.non_persistent_ckpt_type == "local":
         assert args.non_persistent_local_ckpt_dir is not None, "Tried to use local checkpointing without specifying --local-ckpt-dir!"
     if args.replication:
@@ -2221,7 +2232,7 @@ def _add_regularization_args(parser):
     group.add_argument('--muon-no-split-qkv', action='store_false', default=True,
                        dest='muon_split_qkv',
                        help='Whether to split QKV parameters for Muon optimizer')
-    group.add_argument('--muon-use-nesterov', action='store_true',
+    group.add_argument('--muon-nesterov', action='store_true',
                        help='Whether to use Nesterov-style momentum in the internal SGD')
     group.add_argument('--muon-scale-mode', type=str, default='spectral',
                        choices=['spectral', 'unit_rms_norm', 'shape_scaling'],
@@ -2464,8 +2475,10 @@ def _add_training_args(parser):
                        help='use FlashAttention implementation of attention. '
                        'https://arxiv.org/abs/2205.14135')
     group.add_argument('--optimizer', type=str, default='adam',
-                       choices=['adam', 'sgd', 'muon', 'dist_muon', 'lion'],
-                       help='Optimizer function')
+                       choices=['adam', 'sgd', 'muon', 'dist_muon', 'soap', "adaptive_muon", "lion"],
+                       help='Optimizer function. '
+                            'Note: dist_muon is deprecated; use --optimizer muon '
+                            'with --use-distributed-optimizer instead.')
     group.add_argument('--optimizer-cpu-offload', action='store_true',
                        help='Offload optimizer state to CPU')
     group.add_argument('--optimizer-offload-fraction', type=float, default=1.0,
@@ -2482,6 +2495,14 @@ def _add_training_args(parser):
                        help='Disable pinning of CPU memory for gradients.')
     group.add_argument('--no-pin-cpu-params', action='store_false', dest='pin_cpu_params',
                        help='Disable pinning of CPU memory for parameters.')
+    group.add_argument('--offload-optimizer-states',
+                       action='store_true',
+                       dest='offload_optimizer_states',
+                       help='Offload optimizer states to CPU after each optimizer step and '
+                       'reload them before the next optimizer step. '
+                       'Only support TE FusedAdam optimizer.'
+                       'Note that this still uses pure GPU optimizer instead of '
+                       'HybridDeviceOptimizer for --optimizer-cpu-offload.')
     group.add_argument('--dataloader-type', type=str, default=None,
                        choices=['single', 'cyclic', 'external'],
                        help='Single pass vs multiple pass data loader')
@@ -3289,4 +3310,8 @@ def _add_sft_args(parser):
     group.add_argument('--sft', action="store_true", help='Megatron SFT training')
     group.add_argument('--sft-tokenizer-prompt-format', type=str, default="nemotron-h-aligned",
                        help='SFT prompt format.')
+    group.add_argument('--sft-mock-dataset-config-json', type=str, default=None, 
+                       help='This config provides the necessary information for the mock dataset. You can either specify a CSV file that contains sequence lengths, where each line stores the length of a sequence, for example: {"mode":"file","path":"/path/to/file"}. Alternatively, you can specify a distribution (currently only supporting lognormal distribution) along with the required parameters, for example, {"mode":"distribution","type":"lognormal","min_seq_len":1024,"max_seq_len":2048,"mean_seq_len":1536,"lognormal_sigma":1.1}, where sigma controls the variability of the lognormal distribution. '
+                       'If not specified and --mock-data is set, defaults to a lognormal distribution with '
+                       'min_seq_len=seq_length//2, max_seq_len=seq_length, mean_seq_len=seq_length*3//4, lognormal_sigma=1.1.')
     return parser
diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py
index 86cd9421ed0..ec4c0fa8e75 100644
--- a/megatron/training/checkpointing.py
+++ b/megatron/training/checkpointing.py
@@ -569,9 +569,9 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floati
         ensure_directory_exists(optim_checkpoint_name)
         if not optimizer.is_stub_optimizer:
             optimizer.save_parameter_state(optim_checkpoint_name)
-
+    
     # LayerWiseDistributedOptimizer save optimizer state to file on different ranks
-    if getattr(args, "optimizer", "adam").startswith("dist_") and args.ckpt_format == 'torch':
+    if getattr(args, "use_layer_wise_distributed_optimizer", False) and args.ckpt_format == 'torch':
         dp_rank = mpu.get_data_parallel_rank()
         optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
         ensure_directory_exists(optim_checkpoint_name)
@@ -1864,7 +1864,7 @@ def load_model_state_dict(module, state_dict, strict: bool):
     if not release and not args.finetune and not args.no_load_optim:
         try:
             # Load state dict.
-            if getattr(args, "optimizer", "adam").startswith("dist_") and args.ckpt_format == 'torch':
+            if getattr(args, "use_layer_wise_distributed_optimizer", False) and args.ckpt_format == 'torch':
                 # LayerWiseDistributedOptimizer load optimizer state from file on different ranks
                 dp_rank = mpu.get_data_parallel_rank()
                 optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
diff --git a/megatron/training/datasets/data_samplers.py b/megatron/training/datasets/data_samplers.py
index 0e48b3f0865..80b6f8ea378 100644
--- a/megatron/training/datasets/data_samplers.py
+++ b/megatron/training/datasets/data_samplers.py
@@ -39,8 +39,8 @@ def build_pretraining_data_loader(dataset, consumed_samples):
             data_parallel_size=mpu.get_data_parallel_world_size(),
         )
     elif args.dataloader_type == 'single':
-        if args.hybrid_context_parallel:
-            batch_sampler = HybridCPMegatronPretrainingSampler(
+        if args.dynamic_context_parallel:
+            batch_sampler = DynamicCPMegatronPretrainingSampler(
                 total_samples=len(dataset),
                 consumed_samples=consumed_samples,
                 micro_batch_size=args.micro_batch_size,
@@ -95,7 +95,7 @@ def close_nvidia_fds():
         worker_init_fn if args.num_workers > 0 else None
     )
     # Torch dataloader.
-    if args.hybrid_context_parallel:
+    if args.dynamic_context_parallel:
         extra_kwargs = {"collate_fn": lambda x: x,}
     else:
         extra_kwargs = {}
@@ -178,11 +178,11 @@ def __iter__(self):
             start_idx, end_idx = self.get_start_end_idx()
             yield batch[start_idx:end_idx]
 
-class HybridCPMegatronPretrainingSampler(MegatronPretrainingSampler):
+class DynamicCPMegatronPretrainingSampler(MegatronPretrainingSampler):
     """
-    Data sampler for hybrid context parallel (Hybrid CP) format.
+    Data sampler for dynamic context parallel (Dynamic CP) format.
     This data sampler pulls in the entire global batch at once across all data parallel ranks.
-    This helps provide the Hybrid CP Dataloader Wrapper to schedule and load balance sub-samples
+    This helps provide the Dynamic CP Dataloader Wrapper to schedule and load balance sub-samples
     of the entire global batch.
     """
 
diff --git a/megatron/training/datasets/sft_dataset.py b/megatron/training/datasets/sft_dataset.py
index 9de5d2a52fe..250a0137568 100644
--- a/megatron/training/datasets/sft_dataset.py
+++ b/megatron/training/datasets/sft_dataset.py
@@ -2,12 +2,16 @@
 
 import atexit, json
 from collections import Counter
-from typing import Any, Dict, Optional
+import json
+import math
+from typing import Any, Dict, Optional, List, Union
 
 import numpy as np
+import pandas as pd
 import torch
 
 from megatron.core.datasets.gpt_dataset import GPTDatasetConfig
+from megatron.core.datasets.indexed_dataset import IndexedDataset
 from megatron.core.datasets.megatron_dataset import LowLevelDataset, MegatronDataset
 from megatron.core.datasets.utils import Split
 
@@ -88,6 +92,26 @@ def _split_conversations(self, merged_conversations):
             split_conversations.append(current)
         return split_conversations
 
+    def _calculate_padding_divisor(self) -> int:
+        """
+            Calculate the divisor used for sequence padding.
+            tp_pad = tp_size * 2 if tp_size > 1 else 1
+            cp_pad = cp_size * 2 if cp_size > 1 else 1
+            cp_pad = cp_pad * dp_size if dynamic_cp else cp_pad
+            divisor = cp_pad * tp_pad
+        """
+        if self.config.dynamic_context_parallel:
+            # Dynamic CP: consider both CP and DP
+            cp_pad = self.config.data_parallel_size * self.config.context_parallel_size * 2
+        else:
+            # Standard CP: only consider CP
+            cp_pad = self.config.context_parallel_size * 2 if self.config.context_parallel_size > 1 else 1
+        tp_pad = self.config.sequence_parallel_size if self.config.sequence_parallel_size > 0 else 1
+        divisor = cp_pad * tp_pad
+        # TODO(tailaim): do we need to pad for FP8 execution?
+        # divisor = ((divisor + 15) // 16) * 16
+        return divisor
+
     def __getitem__(self, idx: int) -> Dict[str, Any]:
 
         tokenizer = self.config.tokenizer
@@ -124,12 +148,11 @@ def extend_with_padding(tokens, targets, positions, pad_len):
             assert not self.config.reset_position_ids
             pack_positions.extend(range(len(tokens_list)))
 
-            if self.config.context_parallel_size > 1:
-                pad_granularity = self.config.context_parallel_size * 2
-                mod_token_count = len(pack_tokens) % pad_granularity
-                if mod_token_count != 0:
-                    pad_len = pad_granularity - mod_token_count
-                    extend_with_padding(pack_tokens, pack_targets, pack_positions, pad_len)
+            pad_granularity = self._calculate_padding_divisor()
+            mod_token_count = len(pack_tokens) % pad_granularity
+            if mod_token_count != 0:
+                pad_len = pad_granularity - mod_token_count
+                extend_with_padding(pack_tokens, pack_targets, pack_positions, pad_len)
 
             # TODO(duncan): Consider also padding to multiple of number of tokens here. This might
             # be needed for efficiency (and potentially set via command-line argument).
@@ -190,3 +213,214 @@ def extend_with_padding(tokens, targets, positions, pad_len):
             'cu_seqlens': cu_seqlens,
             'max_seqlen': max_seqlen,
         }
+
+
+class MockSFTLowLevelDataset:
+    """The low-level mock dataset for SFT
+
+    Args:
+        mode (str): One of 'file', 'distribution', or 'verification'.
+        **kwargs: Additional arguments depending on mode.
+            For mode='file': path (str) - path to a CSV file with sequence lengths.
+            For mode='distribution': type (str), min_seq_len (int), max_seq_len (int),
+                mean_seq_len (int), and distribution-specific params (e.g. lognormal_sigma).
+            For mode='verification': data_path (str) - prefix path to an IndexedDataset
+                (.bin/.idx files). Optional lognormal distribution params same as
+                'distribution' mode (defaults: min_seq_len=100, max_seq_len=4096,
+                mean_seq_len=2048, lognormal_sigma=1.1).
+        format (str): Output format for MockSFTDataset. Either 'thd' (default, sequence
+            packing with cu_seqlens) or 'sbhd' (padded to seq_length, no cu_seqlens).
+    """
+
+    seed: int = 0
+    """The hard-coded random seed to use to set the NumPy RNG"""
+
+    size: int = 1000000
+    """The hard-coded number of sequence to generate"""
+
+    def __init__(self, mode: str, **kwargs) -> None:
+        np.random.seed(self.seed)
+        self.format = kwargs.get("format", "thd")
+
+        if mode == "file":
+            self.sequence_lengths = np.array(pd.read_csv(kwargs["path"])).flatten()
+            self.size = len(self.sequence_lengths)
+        elif mode == "distribution":
+            min_seq_len = kwargs["min_seq_len"]
+            max_seq_len = kwargs["max_seq_len"]
+            mean_seq_len = kwargs["mean_seq_len"]
+            if kwargs["type"] == "lognormal":
+                lognormal_sigma = kwargs["lognormal_sigma"]
+                self.sequence_lengths = self.generate_lognormal_samples(
+                    self.size, mean_seq_len, lognormal_sigma, min_seq_len, max_seq_len
+                )
+            else:
+                raise ValueError(f"Unsupported distribution type {kwargs['type']}")
+        elif mode == "verification":
+            # Load real tokens from an IndexedDataset for realistic loss curves.
+            # Sequence lengths are drawn from a lognormal distribution (same as
+            # "distribution" mode) to allow controlled comparison of THD vs SBHD.
+            self.indexed_dataset = IndexedDataset(kwargs["data_path"])
+            min_seq_len = kwargs.get("min_seq_len", 100)
+            max_seq_len = kwargs.get("max_seq_len", 4096)
+            mean_seq_len = kwargs.get("mean_seq_len", 2048)
+            lognormal_sigma = kwargs.get("lognormal_sigma", 1.1)
+            self.sequence_lengths = self.generate_lognormal_samples(
+                self.size, mean_seq_len, lognormal_sigma, min_seq_len, max_seq_len
+            )
+        else:
+            raise ValueError(f"Unsupported mode '{mode}', must be 'file', 'distribution', or 'verification'")
+        
+    def generate_lognormal_samples(self, size, mean, sigma, min_seq_len, max_seq_len):   
+        mu = np.log(mean) - sigma**2 / 2
+        samples = np.random.lognormal(mu, sigma, size)
+        samples = np.clip(samples, min_seq_len, max_seq_len)
+        return samples.astype(int)   
+
+    def __len__(self) -> int:
+        return self.size
+
+    def __getitem__(self, idx: int) -> np.ndarray:
+        # The returned sample has 'length-1' tokens; an EOD token is appended
+        # later in MockSFTDataset.__getitem__, making the total 'length' tokens.
+        length = int(self.sequence_lengths[idx % self.size])
+        if hasattr(self, 'indexed_dataset'):
+            target = length - 1
+            num_docs = len(self.indexed_dataset)
+            doc_idx = idx % num_docs
+            raw = self.indexed_dataset[doc_idx]
+            if len(raw) >= target:
+                sample = raw[:target]
+            else:
+                # Concatenate documents until we reach the target length.
+                chunks = [raw]
+                total = len(raw)
+                next_doc = doc_idx + 1
+                while total < target:
+                    raw_next = self.indexed_dataset[next_doc % num_docs]
+                    need = target - total
+                    chunks.append(raw_next[:need])
+                    total += min(len(raw_next), need)
+                    next_doc += 1
+                sample = np.concatenate(chunks)[:target]
+            assert len(sample) == target
+            return sample.astype(np.int64)
+        else:
+            return np.arange(1, length, dtype=np.int64)
+
+
+class MockSFTDataset(SFTDataset):
+    """The mock dataset used during SFT"""
+
+    def __init__(
+        self,
+        dataset: LowLevelDataset,
+        dataset_path: Optional[str],
+        indices: np.ndarray,
+        num_samples: Optional[int],
+        index_split: Split,
+        config: GPTDatasetConfig,
+    ) -> None:
+        super().__init__(dataset, dataset_path, indices, num_samples, index_split, config)
+
+    @staticmethod
+    def build_low_level_dataset(dataset_path: str, config: GPTDatasetConfig) -> LowLevelDataset:
+        if config.sft_mock_dataset_config_json is None:
+            mock_config = {
+                    "mode": "distribution",
+                    "type": "lognormal",
+                    "min_seq_len": config.sequence_length // 2,
+                    "max_seq_len": config.sequence_length,
+                    "mean_seq_len": config.sequence_length // 4 * 3,
+                    "lognormal_sigma": 1.1,
+                }
+        else:
+            mock_config = json.loads(config.sft_mock_dataset_config_json)
+        return MockSFTLowLevelDataset(**mock_config)
+
+    def __len__(self) -> int:
+        return self.num_samples
+
+    def __getitem__(self, idx: int) -> Dict[str, Any]:
+
+        tokenizer = self.config.tokenizer
+        pack_length = self.config.sequence_length
+        eod = tokenizer.eod
+        pad = tokenizer.pad
+
+        tokens = self.dataset[int(self.indices[idx % len(self.indices)])]
+
+        # Convert tokens to list and always append EOD to ensure length consistency.
+        # The low-level dataset returns length-1 tokens, and we add EOD to make it length tokens.
+        tokens_list = tokens.tolist()
+        tokens_list.append(eod)
+
+        if self.dataset.format == "sbhd":
+            # SBHD format: single padded sequence without cu_seqlens.
+            # Long sequences are truncated to pack_length tokens (including EOD).
+            if len(tokens_list) >= pack_length + 1:
+                tokens_list = tokens_list[:pack_length - 1] + [eod]
+            # Pad to pack_length + 1 (offset by 1 for input/label split).
+            pad_len = pack_length + 1 - len(tokens_list)
+            if pad_len > 0:
+                tokens_list = tokens_list + [pad] * pad_len
+            assert len(tokens_list) == pack_length + 1
+            input_ids    = torch.tensor(tokens_list[:-1], dtype=torch.int64)
+            labels       = torch.tensor(tokens_list[1:],  dtype=torch.int64)
+            # Position IDs are sequential across the entire sequence including padding,
+            # matching GPTDataset behavior for standard (non-packed) training.
+            position_ids = torch.arange(pack_length, dtype=torch.int64)
+            loss_mask = torch.ones(pack_length, dtype=torch.float32)
+            loss_mask[labels == pad] = 0.0
+            return {
+                'tokens':       input_ids,
+                'labels':       labels,
+                'loss_mask':    loss_mask,
+                'position_ids': position_ids,
+            }
+
+        # THD format (sequence packing) below.
+        def extend_with_padding(tokens, positions, pad_len):
+            tokens.extend([pad] * pad_len)
+            positions.extend(range(positions[-1] + 1, positions[-1] + 1 + pad_len))
+
+        pack_tokens = list(tokens_list) + [pad]
+        pack_positions = list(range(len(pack_tokens)))
+
+        # Truncate if sequence exceeds pack_length + 1 (need +1 for shift).
+        if len(pack_tokens) > pack_length + 1:
+            pack_tokens = pack_tokens[:pack_length - 1] + [eod, pad]
+            pack_positions = pack_positions[:pack_length + 1]
+
+        # Pad to pad_granularity alignment (tp * cp * 2).
+        # We need final length (after shift) to be divisible by pad_granularity.
+        pad_granularity = self._calculate_padding_divisor()
+        final_len = len(pack_tokens) - 1
+        mod_token_count = final_len % pad_granularity
+        if mod_token_count != 0:
+            pad_len = pad_granularity - mod_token_count
+            extend_with_padding(pack_tokens, pack_positions, pad_len)
+
+        # Apply shift for next-token prediction.
+        input_ids = torch.tensor(pack_tokens[:-1], dtype=torch.int64)
+        labels = torch.tensor(pack_tokens[1:], dtype=torch.int64)
+        position_ids = torch.tensor(pack_positions[:-1], dtype=torch.int64)
+
+        seq_len = len(input_ids)
+        cu_seqlens = [0, seq_len]
+
+        # Loss mask: mask padding tokens
+        loss_mask = torch.ones(seq_len, dtype=torch.float32)
+        loss_mask[labels == pad] = 0.0
+
+        cu_seqlens = torch.tensor(cu_seqlens, dtype=torch.int32)
+        max_seqlen = torch.tensor(seq_len, dtype=torch.int32)
+
+        return {
+            'tokens': input_ids,
+            'labels': labels,
+            'loss_mask': loss_mask,
+            'position_ids': position_ids,
+            'cu_seqlens': cu_seqlens,
+            'max_seqlen': max_seqlen,
+        }
diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py
index 22acd6b6405..c0d8f493b05 100644
--- a/megatron/training/initialize.py
+++ b/megatron/training/initialize.py
@@ -23,7 +23,7 @@
     initialize_rerun_state_machine,
 )
 from megatron.core.transformer.custom_layers.batch_invariant_kernels import enable_batch_invariant_mode
-from megatron.core.utils import get_te_version, is_te_min_version, is_torch_min_version
+from megatron.core.utils import configure_nvtx_profiling, get_te_version, is_te_min_version, is_torch_min_version
 from megatron.training import get_adlr_autoresume, get_args, get_tensorboard_writer
 from megatron.training.utils import print_rank_0, warn_rank_0
 from megatron.training import inprocess_restart
@@ -121,6 +121,12 @@ def state_restore_func(state_dict):
         print_rank_0("Enabling batch invariant mode globally")
         enable_batch_invariant_mode()
 
+    # Enable NVTX range profiling when profiling is active.
+    # Must be done before model modules with @nvtx_decorator are imported,
+    # since the decorator captures _nvtx_enabled at decoration (import) time.
+    if args.profile:
+        configure_nvtx_profiling(True)
+
     # torch.distributed initialization
     def finish_mpu_init():
         args = get_args()
@@ -370,7 +376,7 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s
                 use_sharp=args.use_sharp,
                 context_parallel_size=args.context_parallel_size,
                 hierarchical_context_parallel_sizes=args.hierarchical_context_parallel_sizes,
-                hybrid_context_parallel=args.hybrid_context_parallel,
+                dynamic_context_parallel=args.dynamic_context_parallel,
                 expert_model_parallel_size=args.expert_model_parallel_size,
                 num_distributed_optimizer_instances=args.num_distributed_optimizer_instances,
                 expert_tensor_parallel_size=args.expert_tensor_parallel_size,
diff --git a/megatron/training/training.py b/megatron/training/training.py
index 251d754713c..f5530575d58 100644
--- a/megatron/training/training.py
+++ b/megatron/training/training.py
@@ -175,8 +175,11 @@ def set_startup_timestamps(program_start=None, main_entry=None):
 
 from megatron.core.distributed import finalize_model_grads
 from megatron.core.enums import ModelType
-from megatron.core.optimizer import get_megatron_optimizer, AdamOptimizerConfig, SGDOptimizerConfig, OptimizerConfig, ParamKey
-from megatron.core.optimizer.muon import get_megatron_muon_optimizer
+from megatron.core.optimizer import (
+    get_megatron_optimizer,
+    OptimizerConfig,
+    ParamKey,
+)
 from megatron.core.rerun_state_machine import (
     get_rerun_state_machine,
     destroy_rerun_state_machine,
@@ -188,10 +191,10 @@ def set_startup_timestamps(program_start=None, main_entry=None):
 from megatron.training.initialize import set_jit_fusion_options
 from megatron.training.utils import get_batch_on_this_cp_rank, get_batch_on_this_tp_rank, is_hybrid_model
 from megatron.training.datasets.data_samplers import build_pretraining_data_loader
-from megatron.core.datasets.data_schedule import HybridCPDataLoaderWrapper
+from megatron.core.datasets.data_schedule import DynamicCPDataLoaderWrapper
 from megatron.core.optimizer_param_scheduler import OptimizerParamScheduler
 from megatron.core.transformer.moe import upcycling_utils
-from megatron.core.transformer.moe.moe_utils import track_moe_metrics, clear_aux_losses_tracker
+from megatron.core.transformer.moe.moe_logging import get_moe_metrics_tracker
 from megatron.core.transformer.experimental_attention_variant.dsa import DSAIndexerLossLoggingHelper
 from megatron.core.transformer.multi_token_prediction import MTPLossLoggingHelper
 from megatron.core.parallel_state import (
@@ -218,6 +221,7 @@ def set_startup_timestamps(program_start=None, main_entry=None):
     get_num_microbatches,
     update_num_microbatches
 )
+from megatron.core.datasets.data_schedule import wrap_data_iterator
 
 from .async_utils import maybe_finalize_async_save
 from .utils import (
@@ -274,45 +278,68 @@ def print_datetime(string, override_timestamp=None):
         time_str = datetime.fromtimestamp(override_timestamp).strftime('%Y-%m-%d %H:%M:%S.%f')
     print_rank_0(f'[{string}] datetime: {time_str} ')
 
-def num_floating_point_operations(args, batch_size):
-    def mlp_layer_flops(batch_size, seq_len, hidden_size, expansion=4.0, swiglu=False):
+def num_floating_point_operations(args, seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch):
+    def calculate_layer_counts():
+        """Calculate the number of attention, Mamba, and MLP layers."""
+        if args.hybrid_override_pattern:
+            from megatron.core.ssm.mamba_hybrid_layer_allocation import parse_hybrid_pattern
+            # Parse unified pattern to separate main and MTP components
+            parsed = parse_hybrid_pattern(args.hybrid_override_pattern)
+            counts = {'M': 0, '*': 0, '-': 0, 'E': 0}
+            # Count main decoder layers
+            if parsed.main_pattern:
+                for layer_type in parsed.main_pattern:
+                    if layer_type in counts:
+                        counts[layer_type] += 1
+            # Count MTP layers (pattern repeated mtp_num_depths times)
+            if parsed.mtp_pattern and parsed.mtp_num_depths > 0:
+                for layer_type in parsed.mtp_pattern:
+                    if layer_type in counts:
+                        counts[layer_type] += parsed.mtp_num_depths
+            return counts['*'], counts['M'], counts['-'], counts['E']
+        else:
+            num_attn_layers = round(args.num_layers * args.hybrid_attention_ratio)
+            num_mlp_layers = round(args.num_layers * args.hybrid_mlp_ratio)
+            num_mamba_layers = args.num_layers - num_attn_layers - num_mlp_layers
+            num_moe_layers = 0
+            return num_attn_layers, num_mamba_layers, num_mlp_layers, num_moe_layers
+
+    def mlp_layer_flops(seqlen_sum_this_global_batch, hidden_size, expansion=4.0, swiglu=False):
         """Calculate FLOPs for an MLP layer."""
         scale_factor = 3.0 / 2.0 if swiglu else 1.0
-        return 4 * expansion * scale_factor * batch_size * seq_len * hidden_size**2
+        return 4 * expansion * scale_factor * seqlen_sum_this_global_batch * hidden_size**2
 
-    def moe_layer_flops(batch_size, seq_len, hidden_size, moe_ffn_hidden_size,
+    def moe_layer_flops(seqlen_sum_this_global_batch, hidden_size, moe_ffn_hidden_size,
                         shared_expert_ffn_hidden_size, num_experts_routed_to,
                         moe_latent_size=None, swiglu=False):
         """Calculate FLOPs for an MoE layer."""
         scale_factor = 3.0 / 2.0 if swiglu else 1.0
         if moe_latent_size is None:
-            routed_flops = (4 * batch_size * seq_len * hidden_size *
+            routed_flops = (4 * seqlen_sum_this_global_batch * hidden_size *
                             moe_ffn_hidden_size * num_experts_routed_to * scale_factor)
         else:
             # Routed experts run on moe_latent_size.
-            routed_flops = (4 * batch_size * seq_len * moe_latent_size *
+            routed_flops = (4 * seqlen_sum_this_global_batch * moe_latent_size *
                             moe_ffn_hidden_size * num_experts_routed_to * scale_factor)
             # Up proj and down proj.
-            routed_flops += (4 * batch_size * seq_len * hidden_size * moe_latent_size)
-        shared_flops = 4 * batch_size * seq_len * hidden_size * shared_expert_ffn_hidden_size * scale_factor
+            routed_flops += (4 * seqlen_sum_this_global_batch * hidden_size * moe_latent_size)
+        shared_flops = 4 * seqlen_sum_this_global_batch * hidden_size * shared_expert_ffn_hidden_size * scale_factor
         return routed_flops + shared_flops
 
     def attn_layer_flops(
-        batch_size, seq_len, hidden_size, num_heads, gqa=True, gqa_groups=8, kv_channels=None
+        seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch, hidden_size, num_heads, gqa=True, gqa_groups=8, kv_channels=None
     ):
         """Calculate FLOPs for an attention layer."""
         p = (kv_channels * num_heads / hidden_size) if kv_channels else 1
         g = gqa_groups if gqa else num_heads
         return (
             4
-            * batch_size
-            * seq_len
             * hidden_size
             * p
-            * (hidden_size + (hidden_size * (g / num_heads)) + (seq_len / 2))
+            * (hidden_size * seqlen_sum_this_global_batch + (hidden_size * (g / num_heads)) * seqlen_sum_this_global_batch + (seqlen_squared_sum_this_global_batch / 2))
         )
 
-    def mamba_layer_flops(batch_size, seq_len, hidden_size, state_dim=16,
+    def mamba_layer_flops(seqlen_sum_this_global_batch, hidden_size, state_dim=16,
                           head_dim=64, num_groups=1, num_heads=128):
         """Calculate FLOPs for a Mamba layer."""
         # Note (rwaleffe): flops estimate for scan should be updated based on new SSD kernels,
@@ -325,16 +352,15 @@ def mamba_layer_flops(batch_size, seq_len, hidden_size, state_dim=16,
         return (
             (
                 2
-                * batch_size
-                * seq_len
+                * seqlen_sum_this_global_batch
                 * hidden_size
                 * (2 * d_in + 2 * num_groups * state_dim + nheads)
             )  # in_proj
-            + (7 * batch_size * seq_len * d_in * state_dim)  # scan
-            + (2 * batch_size * seq_len * d_in * hidden_size)  # out_proj
+            + (7 * seqlen_sum_this_global_batch * d_in * state_dim)  # scan
+            + (2 * seqlen_sum_this_global_batch * d_in * hidden_size)  # out_proj
         )
 
-    def gdn_layer_flops(batch_size, seq_len, hidden_size,
+    def gdn_layer_flops(seqlen_sum_this_global_batch, hidden_size,
                         qk_head_dim=128, v_head_dim=128,
                         num_qk_heads=16, num_v_heads=32,
                         conv_kernel_dim=4):
@@ -342,7 +368,7 @@ def gdn_layer_flops(batch_size, seq_len, hidden_size,
         qk_dim = qk_head_dim * num_qk_heads
         v_dim = v_head_dim * num_v_heads
         return (
-            2 * batch_size * seq_len * (
+            2 * seqlen_sum_this_global_batch * (
                 # in_proj: hidden_size -> (2*qk_dim + 2*v_dim + 2*num_v_heads)
                 hidden_size * (2 * qk_dim + 2 * v_dim + 2 * num_v_heads)
                 # conv1d
@@ -354,7 +380,7 @@ def gdn_layer_flops(batch_size, seq_len, hidden_size,
             )
         )
 
-    def hybrid_flops(batch_size, seq_len, hidden_size,
+    def hybrid_flops(seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch, hidden_size,
                      num_attn_layers, num_mamba_layers, num_mlp_layers, num_moe_layers,
                      num_gdn_layers=0,
                      mamba_state_dim=128, mamba_head_dim=64,
@@ -370,21 +396,21 @@ def hybrid_flops(batch_size, seq_len, hidden_size,
                      vocab_size=256000, mtp_num_layers=0):
         """Calculate total FLOPs for the hybrid model."""
         flops_fwd = (
-                num_attn_layers * attn_layer_flops(batch_size, seq_len, hidden_size,
+                num_attn_layers * attn_layer_flops(seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch, hidden_size,
                                                    num_attn_heads, gqa, gqa_groups, kv_channels) +
-                num_mlp_layers * mlp_layer_flops(batch_size, seq_len, hidden_size,
+                num_mlp_layers * mlp_layer_flops(seqlen_sum_this_global_batch, hidden_size,
                                                  mlp_expansion, swiglu) +
-                num_mamba_layers * mamba_layer_flops(batch_size, seq_len, hidden_size,
+                num_mamba_layers * mamba_layer_flops(seqlen_sum_this_global_batch, hidden_size,
                                                      mamba_state_dim, mamba_head_dim,
                                                      mamba_num_groups, mamba_num_heads) +
-                num_moe_layers * moe_layer_flops(batch_size, seq_len, hidden_size, moe_ffn_hidden_size,
+                num_moe_layers * moe_layer_flops(seqlen_sum_this_global_batch, hidden_size, moe_ffn_hidden_size,
                                                  shared_expert_ffn_hidden_size, num_experts_routed_to,
                                                  moe_latent_size, swiglu) +
-                num_gdn_layers * gdn_layer_flops(batch_size, seq_len, hidden_size,
+                num_gdn_layers * gdn_layer_flops(seqlen_sum_this_global_batch, hidden_size,
                                                   gdn_qk_head_dim, gdn_v_head_dim,
                                                   gdn_num_qk_heads, gdn_num_v_heads,
                                                   gdn_conv_kernel_dim) +
-                (2 * batch_size * seq_len * hidden_size * vocab_size * (1 + mtp_num_layers))  # logits computation
+                (2 * seqlen_sum_this_global_batch * hidden_size * vocab_size * (1 + mtp_num_layers))  # logits computation
         )
         return flops_fwd * 3
 
@@ -455,13 +481,18 @@ def transformer_flops():
             assert not args.group_query_attention
             '''
             Basic arithmetic
-            let B is batch size, s is seq_len, h is embedding dim,
-            for one self_attnetion block (prenorm is not included)
-            qkv projection:  6Bsh^2
-            attn:            2Bs^2h
-            attn over value: 2Bs^2h
-            oproj:           2Bsh^2
-
+            
+            Let h be the embedding dim.
+            We use two statistics to unify BSHD and THD cases:
+                seqlen_sum_this_global_batch: total number of tokens in this global batch
+                seqlen_squared_sum_this_global_batch: sum of squared sequence lengths in this global batch
+
+            For one self-attention block (prenorm not included):
+                qkv projection:      6 * seqlen_sum_this_global_batch * h^2
+                attn:    2 * seqlen_squared_sum_this_global_batch * h
+                attn over value:   2 * seqlen_squared_sum_this_global_batch * h
+                oproj:   2 * seqlen_sum_this_global_batch * h^2
+            
             references
             https://arxiv.org/abs/2305.10403
             https://arxiv.org/abs/2205.05198
@@ -482,7 +513,7 @@ def transformer_flops():
             standard_self_attn_term = (
                 forward_backward_expansion_factor
                 * fma_expansion_factor
-                * (
+                * ( seqlen_sum_this_global_batch * (
                     ## q lora + rope + q norm
                     q_term
                     ## kv lora + rope + kv norm
@@ -494,12 +525,12 @@ def transformer_flops():
                     )
                     + args.hidden_size * args.qk_pos_emb_head_dim
                     ## o proj
-                    + (args.num_attention_heads * args.v_head_dim) * args.hidden_size
+                    + (args.num_attention_heads * args.v_head_dim) * args.hidden_size)
                     ## core attn
-                    + args.seq_length
+                    + seqlen_squared_sum_this_global_batch
                     * (args.num_attention_heads * (args.qk_head_dim + args.qk_pos_emb_head_dim))
-                    / 2  # causal mask (only half of the mask is non-zero)
-                    + args.seq_length * args.num_attention_heads * args.v_head_dim / 2
+                    / 2 # causal mask (only half of the mask is non-zero)
+                    + seqlen_squared_sum_this_global_batch * args.num_attention_heads * args.v_head_dim / 2
                 )
             )
 
@@ -512,7 +543,7 @@ def transformer_flops():
             standard_self_attn_term = (
                 forward_backward_expansion_factor
                 * fma_expansion_factor
-                * (
+                * ( seqlen_sum_this_global_batch *(
                     ## qkv proj
                     args.hidden_size
                     * (
@@ -520,14 +551,14 @@ def transformer_flops():
                         + key_projection_size
                         + value_projection_size
                         + gate_projection_size
-                    )
+                    ))
                     ## core attention
                     + query_projection_size
-                    * args.seq_length
+                    * seqlen_squared_sum_this_global_batch
                     / 2  # causal mask (only half of the mask is non-zero)
                     * 2  # QK^T and (QK^T)V
                     ## out proj
-                    + query_projection_size
+                    + seqlen_sum_this_global_batch * query_projection_size
                     * args.hidden_size
                 )
             )
@@ -588,7 +619,7 @@ def transformer_flops():
                         + args.hidden_size
                         * v_dim
                     )
-                )
+                ) * seqlen_sum_this_global_batch
             else:
                 raise ValueError(
                     "Invalid experimental_attention_variant: "
@@ -605,8 +636,7 @@ def transformer_flops():
         )
 
         total_floating_point_operations = (
-            batch_size
-            * args.seq_length
+            seqlen_sum_this_global_batch
             * (
                 # MLP
                 forward_backward_expansion_factor
@@ -636,8 +666,6 @@ def transformer_flops():
                     + (shared_expert_ffn_hidden_size * ffn_expansion_factor)
                     * num_moe_layers
                 )
-                # Self Attention
-                + self_attn_term
                 # MTP norms and proj
                 + forward_backward_expansion_factor
                 * fma_expansion_factor
@@ -655,6 +683,10 @@ def transformer_flops():
                 * args.padded_vocab_size
                 * (mtp_num_layers + 1)  # MTP + final logit
             )
+            +                
+            # Self Attention
+            self_attn_term
+            
         )
         return total_floating_point_operations
 
@@ -675,8 +707,8 @@ def transformer_flops():
             mtp_num_layers = 0
         # Compute hybrid model FLOPs.
         return hybrid_flops(
-            batch_size=batch_size,
-            seq_len=args.seq_length,
+            seqlen_sum_this_global_batch=seqlen_sum_this_global_batch, 
+            seqlen_squared_sum_this_global_batch=seqlen_squared_sum_this_global_batch,
             hidden_size=args.hidden_size,
             num_attn_layers=num_attn_layers,
             num_mamba_layers=num_mamba_layers,
@@ -905,7 +937,9 @@ def pretrain(
             set_ideal_affinity_for_current_gpu
         )
         set_ideal_affinity_for_current_gpu()
-
+    if args.batch_invariant_mode:
+        print_rank_0("Enabling batch invariant mode globally", flush=True)
+        enable_batch_invariant_mode()
 
     if args.log_progress:
         append_to_progress_log("Starting job")
@@ -1568,23 +1602,11 @@ def get_optimizer_param_scheduler(optimizer):
 def get_megatron_optimizer_config(args: Any) -> OptimizerConfig:
     """Return a Megatron optimizer config object from Megatron's arguments."""
 
-    config = None
-    if args.optimizer == 'adam' or 'muon' in args.optimizer:
-        # TODO(deyuf): Muon needs both adam + muon but get() only receive one config
-        # So for now we keep using adam config that's back compat with old way
-        kwargs = {}
-        for f in dataclasses.fields(AdamOptimizerConfig):
-            if hasattr(args, f.name):
-                kwargs[f.name] = getattr(args, f.name)
-        config = AdamOptimizerConfig(**kwargs)
-    elif args.optimizer == 'sgd':
-        kwargs = {}
-        for f in dataclasses.fields(SGDOptimizerConfig):
-            if hasattr(args, f.name):
-                kwargs[f.name] = getattr(args, f.name)
-        config = SGDOptimizerConfig(**kwargs)
-    else:
-        raise ValueError("Invalid optimizer type!")
+    kwargs = {}
+    for f in dataclasses.fields(OptimizerConfig):
+        if hasattr(args, f.name):
+            kwargs[f.name] = getattr(args, f.name)
+    config = OptimizerConfig(**kwargs)
 
     # Construct the appropriate config_overrides object. This default handles many cases, but
     #  can be added to as needed by the user, or replaced entirely with a custom override.
@@ -1633,25 +1655,13 @@ def setup_model_and_optimizer(
             if mup_overrides:
                 config_overrides = {**(config_overrides or {}), **mup_overrides}
 
-        if 'muon' not in config.optimizer:
-            # If the user is asking for a non-zero embedding init std, skip weight decay for embeddings
-            # to avoid embeddings from shrinking to zero as recommended in https://arxiv.org/abs/2312.16903
-            # default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
-            optimizer = get_megatron_optimizer(
-                config,
-                model,
-                config_overrides=config_overrides,
-                use_gloo_process_groups=args.use_gloo_process_groups,
-                dump_param_to_param_group_map=args.dump_param_to_param_group_map,
-            )
-        else:
-            optimizer = get_megatron_muon_optimizer(
-                config,
-                model,
-                config_overrides=config_overrides,
-                use_gloo_process_groups=args.use_gloo_process_groups,
-                layer_wise_distributed_optimizer='dist' in config.optimizer,
-            )
+        optimizer = get_megatron_optimizer(
+            config,
+            model,
+            config_overrides=config_overrides,
+            use_gloo_process_groups=args.use_gloo_process_groups,
+            dump_param_to_param_group_map=args.dump_param_to_param_group_map,
+        )
         opt_param_scheduler = get_optimizer_param_scheduler(optimizer)
 
     one_logger and one_logger.log_metrics({"app_build_optimzer_finish_time": one_logger_utils.get_timestamp_in_ms()})
@@ -1788,6 +1798,12 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
     save_wgrads_in_this_iteration = (args.save_wgrads_interval is not None and
                                      (iteration + 1) % args.save_wgrads_interval == 0)
     while rerun_state_machine.should_run_forward_backward(data_iterator):
+        # Offload optimizer states to CPU if enabled.
+        if args.offload_optimizer_states:
+            for optim_instance in optimizer.chained_optimizers:
+                if isinstance(optim_instance, DistributedOptimizer):
+                    optim_instance.offload_states()
+
         # Set grad to zero.
         for model_chunk in model:
             model_chunk.zero_grad_buffer()
@@ -1823,6 +1839,35 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
                     if isinstance(optim_instance, DistributedOptimizer):
                         optim_instance._copy_main_params_to_param_buffer()
 
+        # Release GPU memory for offloaded optimizer states.
+        # This needs to be done after _copy_main_params_to_param_buffer().
+        # Separate offload and release to allow early D2H transfer to overlap with other operations.
+        if args.offload_optimizer_states:
+            for optim_instance in optimizer.chained_optimizers:
+                if isinstance(optim_instance, DistributedOptimizer):
+                    optim_instance.release_offloaded_gpu_states()
+
+        if config.sequence_packing_scheduler is not None:
+            # This wrapper is designed to support DP-balanced THD and dynamic-CP.
+            # Before wrapping, the data_iterator returns either a single sequence per get_item call, or a list where each element is a sequence.
+            # The wrapper is responsible for:
+            # 1. scheduling the sequences across ranks
+            # 2. packing them into THD format
+            # 3. broadcast flops parametes and num_microbatches to TP ranks to support unfixed num_microbatches
+            # 4. broadcast metadata(cu_seqlens, cu_seqlens_padded, max_seqlen, etc.) to PP ranks to
+            # 5. returning the packed data iterator and the FLOPs parameters
+            (
+                data_iterator,
+                num_microbatches,
+                seqlen_sum_this_global_batch,
+                seqlen_squared_sum_this_global_batch,
+            ) = wrap_data_iterator(data_iterator, config, get_num_microbatches())
+        else:
+            # data_iterator unchanged
+            num_microbatches = get_num_microbatches()
+            seqlen_sum_this_global_batch = args.seq_length * args.global_batch_size
+            seqlen_squared_sum_this_global_batch = args.seq_length ** 2 * args.global_batch_size
+
         # Forward pass.
         if save_dgrads_in_this_iteration:
             enable_dgrad_logging(model, args.save)
@@ -1830,7 +1875,7 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
             forward_step_func=forward_step_func,
             data_iterator=data_iterator,
             model=model,
-            num_microbatches=get_num_microbatches(),
+            num_microbatches=num_microbatches,
             seq_length=args.seq_length,
             micro_batch_size=args.micro_batch_size,
             decoder_seq_length=args.decoder_seq_length,
@@ -1863,7 +1908,7 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
 
     should_checkpoint, should_exit, exit_code = rerun_state_machine.should_checkpoint_and_exit()
     if should_exit:
-        return {}, True, should_checkpoint, should_exit, exit_code, None, None, 0
+        return {}, True, should_checkpoint, should_exit, exit_code, None, None, 0, seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch
 
     # Empty unused memory.
     if args.empty_unused_memory_level >= 1:
@@ -1943,8 +1988,10 @@ def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_sch
             grad_norm,
             num_zeros_in_grad,
             log_max_attention_logit,
+            seqlen_sum_this_global_batch,
+            seqlen_squared_sum_this_global_batch,
         )
-    return {}, skipped_iter, should_checkpoint, should_exit, exit_code, grad_norm, num_zeros_in_grad, log_max_attention_logit
+    return {}, skipped_iter, should_checkpoint, should_exit, exit_code, grad_norm, num_zeros_in_grad, log_max_attention_logit, seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch
 
 
 def training_log(
@@ -1959,6 +2006,8 @@ def training_log(
     params_norm,
     num_zeros_in_grad,
     max_attention_logit,
+    seqlen_sum_this_global_batch,
+    seqlen_squared_sum_this_global_batch,
     pg_collection=None,
     is_first_iteration=False,
 ):
@@ -2122,8 +2171,8 @@ def training_log(
             writer.add_scalar('max_attention_logit', max_attention_logit, iteration)
             if wandb_writer:
                 wandb_writer.log({'max_attention_logit': max_attention_logit}, iteration)
-
     # Log MoE metrics.
+    moe_log_string = ""
     if args.num_experts is not None:
         moe_loss_scale = 1 / get_num_microbatches()
         track_names = []
@@ -2146,12 +2195,11 @@ def training_log(
         else:
             layers = args.num_layers
 
-        track_moe_metrics(
+        moe_log_string = get_moe_metrics_tracker().report(
             loss_scale=moe_loss_scale,
             iteration=iteration,
             writer=writer,
             wandb_writer=wandb_writer,
-            total_loss_dict=total_loss_dict,
             per_layer_logging=args.moe_per_layer_logging,
             force_initialize=True,
             track_names=track_names,
@@ -2159,6 +2207,7 @@ def training_log(
             moe_layer_freq=args.moe_layer_freq,
             mtp_num_layers=args.mtp_num_layers,
             pg_collection=pg_collection,
+            total_loss_dict=total_loss_dict,
         )
 
     # Log MTP metrics.
@@ -2167,7 +2216,6 @@ def training_log(
         MTPLossLoggingHelper.track_mtp_metrics(
             mtp_loss_scale, iteration, writer, wandb_writer, total_loss_dict
         )
-
     # Track sparse attention indexer loss.
     if args.dsa_indexer_loss_coeff is not None and args.dsa_indexer_loss_coeff > 0:
         indexer_loss_scale = 1 / get_num_microbatches()
@@ -2178,7 +2226,6 @@ def training_log(
             wandb_writer=wandb_writer,
             total_loss_dict=total_loss_dict,
         )
-
     # Dump memory snapshot and print metrics to stdout.
     if iteration % args.log_interval == 0 or is_first_iteration:
         if args.record_memory_history and (is_last_rank() or torch.distributed.get_backend() == 'fake'):
@@ -2191,7 +2238,7 @@ def training_log(
         elapsed_time = timers('interval-time').elapsed(barrier=True, reset=should_reset)
         elapsed_time_per_iteration = elapsed_time / total_iterations
 
-        throughput = num_floating_point_operations(args, batch_size) / (
+        throughput = num_floating_point_operations(args,seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch) / (
             elapsed_time_per_iteration * 10**12 * args.world_size
         )
 
@@ -2246,6 +2293,8 @@ def training_log(
                     log_string += ' {}: {:.6E} |'.format(key, avg)
                 if should_reset:
                     total_loss_dict[key] = torch.tensor([0.0], dtype=torch.float, device='cuda')
+        if args.num_experts is not None and moe_log_string:
+            log_string += moe_log_string
         log_string += f' loss scale: {loss_scale:.1f} |'
         if grad_norm is not None:
             log_string += f' grad norm: {grad_norm:.3f} |'
@@ -2361,7 +2410,8 @@ def save_checkpoint_and_time(
 
     # Stop timer to get accurate train interval time and exclude checkpointing duration
     timers('interval-time').stop()
-    energy_monitor.pause()
+    if args.log_energy:
+        energy_monitor.pause()
 
     # Extra barrier is added to make sure all ranks report the max time.
     timer_key = 'save-checkpoint-non-persistent' if non_persistent_ckpt else 'save-checkpoint'
@@ -2419,7 +2469,9 @@ def save_checkpoint_and_time(
         )
 
     # Recover timing
-    energy_monitor.resume()
+    if args.log_energy:
+        energy_monitor.resume()
+
     timers('interval-time', log_level=0).start(barrier=True)
 
 
@@ -2688,8 +2740,8 @@ def train(
     energy_monitor = get_energy_monitor()
     one_logger = get_one_logger()
 
-    if args.hybrid_context_parallel:
-        train_data_iterator = iter(HybridCPDataLoaderWrapper(train_data_iterator, config))
+    if args.dynamic_context_parallel:
+        train_data_iterator = iter(DynamicCPDataLoaderWrapper(train_data_iterator, config))
 
     if args.run_workload_inspector_server:
         try:
@@ -2757,7 +2809,21 @@ def train(
         config.param_sync_func = [model_chunk.start_param_sync for model_chunk in model]
         if len(model) == 1:
             config.param_sync_func = config.param_sync_func[0]
-    config.finalize_model_grads_func = finalize_model_grads
+
+    # Wrap finalize_model_grads to reload offloaded optimizer states before grad finalization.
+    # This allows H2D transfer to overlap with grad all-reduce.
+    if args.offload_optimizer_states:
+
+        def finalize_model_grads_with_state_reload(*fmg_args, **fmg_kwargs):
+            # Reload offloaded states for all DistributedOptimizer instances
+            for optim_instance in optimizer.chained_optimizers:
+                if isinstance(optim_instance, DistributedOptimizer):
+                    optim_instance.reload_offloaded_states()
+            return finalize_model_grads(*fmg_args, **fmg_kwargs)
+
+        config.finalize_model_grads_func = finalize_model_grads_with_state_reload
+    else:
+        config.finalize_model_grads_func = finalize_model_grads
 
     if args.log_energy:
         energy_monitor.setup()
@@ -3004,6 +3070,8 @@ def trace_handler(p):
             grad_norm = 0.0
             num_zeros_in_grad = 0
             max_attention_logit = None
+            seqlen_sum_this_global_batch = 0
+            seqlen_squared_sum_this_global_batch = 0
         else:
             ft_integration.on_training_step_start()
             (
@@ -3015,6 +3083,8 @@ def trace_handler(p):
                 grad_norm,
                 num_zeros_in_grad,
                 max_attention_logit,
+                seqlen_sum_this_global_batch,
+                seqlen_squared_sum_this_global_batch,
             ) = train_step(
                 forward_step_func, train_data_iterator, model, optimizer, opt_param_scheduler, config, forward_backward_func, iteration=iteration
             )
@@ -3102,7 +3172,7 @@ def trace_handler(p):
         else:
             assert num_skipped_samples_in_batch == 0
         args.skipped_train_samples += num_skipped_samples_in_batch
-        num_floating_point_operations_in_batch = num_floating_point_operations(args, batch_size)
+        num_floating_point_operations_in_batch = num_floating_point_operations(args, seqlen_sum_this_global_batch, seqlen_squared_sum_this_global_batch)
         num_floating_point_operations_so_far += num_floating_point_operations_in_batch
         num_floating_point_operations_since_last_log_event += num_floating_point_operations_in_batch
 
@@ -3131,6 +3201,8 @@ def trace_handler(p):
             params_norm,
             num_zeros_in_grad,
             max_attention_logit,
+            seqlen_sum_this_global_batch, 
+            seqlen_squared_sum_this_global_batch,
             pg_collection=model_pg_collection,
             is_first_iteration=is_first_iteration,
         )
@@ -3192,7 +3264,7 @@ def trace_handler(p):
             if args.log_energy:
                 energy_monitor.resume()
             if args.num_experts is not None:
-                clear_aux_losses_tracker()
+                get_moe_metrics_tracker().clear()
 
         # Miscellaneous post-training-step functions (e.g., FT heartbeats, GC).
         # Some of these only happen at specific iterations. Capture updated FLOPs accumulator
@@ -3326,9 +3398,30 @@ def evaluate(
             # Don't care about timing during evaluation
             config.timers = None
             ft_integration.on_eval_step_start()
+            if config.sequence_packing_scheduler is not None:
+                # This wrapper is designed to support DP-balanced THD and dynamic-CP.
+                # Before wrapping, the data_iterator returns either a single sequence per get_item call, or a list where each element is a sequence.
+                # The wrapper is responsible for:
+                # 1. scheduling the sequences across ranks
+                # 2. packing them into THD format
+                # 3. broadcast flops parametes and num_microbatches to TP ranks to support unfixed num_microbatches
+                # 4. broadcast metadata(cu_seqlens, cu_seqlens_padded, max_seqlen, etc.) to PP ranks to
+                # 5. returning the packed data iterator and the FLOPs parameters
+                try:
+                    (
+                        packed_data_iterator,
+                        eval_num_microbatches,
+                        _,
+                        _,
+                    ) = wrap_data_iterator(data_iterator, config, eval_num_microbatches)
+                except StopIteration:
+                    # Validation data iterator exhausted, stop evaluation early.
+                    break
+            else:
+                packed_data_iterator = data_iterator
             loss_dicts = forward_backward_func(
                 forward_step_func=forward_step_func,
-                data_iterator=data_iterator,
+                data_iterator=packed_data_iterator,
                 model=model,
                 num_microbatches=eval_num_microbatches,
                 seq_length=args.seq_length,
@@ -3558,18 +3651,20 @@ def get_train_valid_test_num_samples():
     return (train_samples_in_current_phase, eval_samples, test_samples)
 
 
-def build_train_valid_test_datasets(build_train_valid_test_datasets_provider, train_valid_test_num_samples=None):
+def build_train_valid_test_datasets(build_train_valid_test_datasets_provider, train_valid_test_num_samples=None, vp_stage=None):
     """Build pretraining datasets."""
     if train_valid_test_num_samples is None:
         train_valid_test_num_samples = get_train_valid_test_num_samples()
-    print_rank_0(' > datasets target sizes (minimum size):')
     print_rank_0('    train:      {}'.format(train_valid_test_num_samples[0]))
     print_rank_0('    validation: {}'.format(train_valid_test_num_samples[1]))
     print_rank_0('    test:       {}'.format(train_valid_test_num_samples[2]))
-    return build_train_valid_test_datasets_provider(train_valid_test_num_samples)
+    if vp_stage is not None:
+        return build_train_valid_test_datasets_provider(train_valid_test_num_samples, vp_stage=vp_stage)
+    else:
+        return build_train_valid_test_datasets_provider(train_valid_test_num_samples)
 
 
-def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider):
+def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider, vp_stage=None):
     """Build pretraining data loaders."""
 
     args = get_args()
@@ -3616,7 +3711,10 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
 
         else:
             # Build datasets.
-            train_ds, valid_ds, test_ds = build_train_valid_test_datasets(build_train_valid_test_datasets_provider)
+            train_ds, valid_ds, test_ds = build_train_valid_test_datasets(
+                build_train_valid_test_datasets_provider,
+                vp_stage=vp_stage,
+            )
             valid_ds = [valid_ds] if not isinstance(valid_ds, list) else valid_ds
             if args.skip_train:
                 train_dataloader = None
@@ -3653,14 +3751,15 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider
     return train_dataloader, valid_dataloaders, test_dataloader
 
 
-def build_train_valid_test_data_iterators(build_train_valid_test_datasets_provider):
+def build_train_valid_test_data_iterators(build_train_valid_test_datasets_provider, vp_stage=None):
     """Build pretraining data iterators."""
 
     args = get_args()
 
     # Build loaders.
     train_dataloader, valid_dataloaders, test_dataloader = build_train_valid_test_data_loaders(
-        build_train_valid_test_datasets_provider
+        build_train_valid_test_datasets_provider,
+        vp_stage=vp_stage
     )
 
     # Build iterators.
diff --git a/megatron/training/utils.py b/megatron/training/utils.py
index 843f3d74ec9..ba470f165ec 100644
--- a/megatron/training/utils.py
+++ b/megatron/training/utils.py
@@ -39,6 +39,7 @@
 from megatron.core.utils import (
     get_batch_on_this_cp_rank,
     get_data_parallel_group_if_dtensor,
+    is_torch_min_version,
     to_local_if_dtensor,
     unwrap_model,
 )
@@ -286,7 +287,8 @@ def report_memory(name):
     string += f" | max allocated: {torch.cuda.max_memory_allocated() / mega_bytes:.2f}"
     string += f" | reserved: {torch.cuda.memory_reserved() / mega_bytes:.2f}"
     string += f" | max reserved: {torch.cuda.max_memory_reserved() / mega_bytes:.2f}"
-    if args.log_device_memory_used:
+    if args.log_device_memory_used and is_torch_min_version("2.6.0"):
+        # device usage is not supported in torch < 2.6.0
         string += f" | total device memory used: {torch.cuda.device_memory_used() / mega_bytes:.2f}"
     if mpu.get_data_parallel_rank() == 0:
         print("[Rank {}] {}".format(torch.distributed.get_rank(), string), flush=True)
@@ -578,7 +580,7 @@ def _broadcast_cu_seqlens(cu_seqlens):
                 buf = cu_seqlens.to(device=dev, non_blocking=True).contiguous()
             _broadcast(buf)
 
-        if args.hybrid_context_parallel:
+        if args.dynamic_context_parallel:
             seq_len = torch.tensor(batch['tokens'].shape[0], dtype=torch.int32, device=torch.cuda.current_device())
             _broadcast(seq_len)
             
@@ -608,7 +610,7 @@ def _broadcast_cu_seqlens(cu_seqlens):
             _broadcast(batch['attention_mask'])
 
     else:
-        if args.hybrid_context_parallel:
+        if args.dynamic_context_parallel:
             seq_len = torch.tensor(0, dtype=torch.int32, device=torch.cuda.current_device())
             _broadcast(seq_len)
             shape = (seq_len.item())
@@ -631,7 +633,7 @@ def _broadcast_cu_seqlens(cu_seqlens):
             device=torch.cuda.current_device(),
         )
         if args.create_attention_mask_in_dataloader:
-            shape_attention_mask = (args.micro_batch_size, 1, args.seq_length, args.seq_length) if not args.hybrid_context_parallel else (1, 1, shape[0], shape[0])
+            shape_attention_mask = (args.micro_batch_size, 1, args.seq_length, args.seq_length) if not args.dynamic_context_parallel else (1, 1, shape[0], shape[0])
             attention_mask = torch.empty(
                 shape_attention_mask,
                 dtype=torch.bool,
@@ -645,7 +647,7 @@ def _broadcast_cu_seqlens(cu_seqlens):
             device=torch.cuda.current_device(),
         )
         cu_seqlens = None
-        if args.hybrid_context_parallel or args.sft:
+        if args.dynamic_context_parallel or args.sft:
             max_seqlen = torch.empty(
                 1,
                 dtype=torch.int32,
@@ -658,7 +660,7 @@ def _broadcast_cu_seqlens(cu_seqlens):
             1,
             dtype=torch.int32,
             device=torch.cuda.current_device(),
-        ) if args.hybrid_context_parallel else None
+        ) if args.dynamic_context_parallel else None
 
         def _broadcast_cu_seqlens():
             dev = torch.cuda.current_device()
diff --git a/pretrain_gpt.py b/pretrain_gpt.py
index 31eee0f4dc6..dfb3afd2f95 100644
--- a/pretrain_gpt.py
+++ b/pretrain_gpt.py
@@ -25,12 +25,13 @@
 from megatron.core import parallel_state
 from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
 from megatron.core.datasets.gpt_dataset import GPTDataset, GPTDatasetConfig, MockGPTDataset
+from megatron.core.datasets.data_schedule import get_batch_on_this_rank_for_sequence_packing
 from megatron.core.enums import ModelType
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.models.gpt import GPTModel
 from megatron.core.rerun_state_machine import get_rerun_state_machine
 from megatron.core.tokenizers.utils.build_tokenizer import build_tokenizer
-from megatron.core.utils import get_attr_wrapped_model, get_thd_batch_on_this_cp_rank, get_batch_on_this_hybrid_cp_rank, StragglerDetector
+from megatron.core.utils import get_attr_wrapped_model, get_thd_batch_on_this_cp_rank, get_batch_on_this_dynamic_cp_rank, StragglerDetector
 from megatron.training import (
     get_args,
     get_timers,
@@ -39,6 +40,7 @@
     print_rank_0,
     set_startup_timestamps,
 )
+from megatron.training.arguments import core_transformer_config_from_args
 from megatron.training.datasets.sft_dataset import SFTDataset
 from megatron.core.transformer.multi_token_prediction import mtp_on_this_rank, get_mtp_ranks
 from megatron.training.arguments import core_transformer_config_from_args
@@ -49,6 +51,7 @@
     get_blend_and_blend_per_split,
     is_first_or_last_pipeline_stage,
 )
+from megatron.training.datasets.sft_dataset import SFTDataset, MockSFTDataset
 from model_provider import model_provider
 
 try:
@@ -114,6 +117,15 @@ def get_batch(data_iterator, vp_stage: Optional[int] = None):
     """
     args = get_args()
     config = core_transformer_config_from_args(args)
+
+    if args.sequence_packing_scheduler is not None:
+        return get_batch_on_this_rank_for_sequence_packing(
+            data_iterator,
+            vpp_size=config.virtual_pipeline_model_parallel_size,
+            mtp_on_this_rank=mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage),
+            vp_stage=vp_stage,
+        )
+
     # TODO: this is pretty hacky, find a better way
     is_packed_sequence = get_args().sft  # SFT always uses packed sequence
     if not is_first_or_last_pipeline_stage(vp_stage) and not is_packed_sequence and (
@@ -157,9 +169,9 @@ def get_batch(data_iterator, vp_stage: Optional[int] = None):
         packed_seq_params = None
     elif local_cp_size is None:  # Packed THD format
         batch, packed_seq_params = get_thd_batch_on_this_cp_rank(batch, cu_seqlens, cu_seqlens_padded, max_seqlen)
-    else: # Hybrid CP format
-        batch, packed_seq_params = get_batch_on_this_hybrid_cp_rank(batch, local_cp_size)
-
+    else: # Dynamic CP format
+        batch, packed_seq_params = get_batch_on_this_dynamic_cp_rank(batch, local_cp_size)
+    
     return (*batch.values(), packed_seq_params)
 
 
@@ -319,7 +331,8 @@ def core_gpt_dataset_config_from_args(args):
         "context_parallel_size": args.context_parallel_size,
         "data_parallel_size": args.data_parallel_size,
         "sequence_parallel_size": args.tensor_model_parallel_size*args.sequence_parallel,
-        "hybrid_context_parallel": args.hybrid_context_parallel,
+        "dynamic_context_parallel": args.dynamic_context_parallel,
+        "sft_mock_dataset_config_json":args.sft_mock_dataset_config_json,
     }
 
     # add FIM args to the config
@@ -359,7 +372,10 @@ def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None
 
     is_packed_sequence = False
     if args.sft:
-        dataset_type = SFTDataset
+        if args.mock_data:
+            dataset_type = MockSFTDataset
+        else:
+            dataset_type = SFTDataset
         is_packed_sequence = True  # SFT always uses packed sequence
     else:
         if args.mock_data:
diff --git a/pretrain_mamba.py b/pretrain_mamba.py
index a30979af714..048d40f82a5 100644
--- a/pretrain_mamba.py
+++ b/pretrain_mamba.py
@@ -94,7 +94,7 @@ def get_batch(data_iterator, vp_stage=None):
     cu_seqlens = batch['cu_seqlens']
     # Unused at the moment
     cu_seqlens_padded = batch.pop('cu_seqlens_padded', None)
-    # Support for Hybrid Context Parallel (Unused in this script)
+    # Support for Dynamic Context Parallel (Unused in this script)
     local_cp_size = batch.pop('local_cp_size', None)
 
     if cu_seqlens is not None:
diff --git a/pyproject.toml b/pyproject.toml
index 27a94cff130..a9889eb8c00 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,12 +8,7 @@ build-backend = "setuptools.build_meta"
 include-package-data = true
 
 [tool.setuptools.packages.find]
-include = [
-    "megatron.core",
-    "megatron.core.*",
-    "megatron.training",
-    "megatron.training.*",
-]
+include = ["megatron.core", "megatron.core.*"]
 
 [tool.setuptools.dynamic]
 version = { attr = "megatron.core.package_info.__version__" }
@@ -68,16 +63,6 @@ Download = "https://github.com/NVIDIA/Megatron-LM/releases"
 Homepage = "https://github.com/NVIDIA/Megatron-LM"
 
 [project.optional-dependencies]
-training = [
-    "flask-restful",
-    "sentencepiece",
-    "tiktoken",
-    "wandb",
-    "transformers",
-    "accelerate",
-]
-
-### 'mlm' group is deprecated. please use 'training' instead ###
 mlm = [
     "flask-restful",
     "sentencepiece",
@@ -89,7 +74,7 @@ mlm = [
 
 dev = [
     "nvidia-modelopt[torch]; sys_platform != 'darwin'",
-    "transformer-engine[pytorch,core_cu13]",
+    "transformer-engine[pytorch,core_cu13]>=2.9.0a0,<2.12.0",
     "nvidia-resiliency-ext",
     "tqdm",
     "einops~=0.8",
@@ -100,18 +85,18 @@ dev = [
     "mamba-ssm~=2.2",
     "causal-conv1d~=1.5",
     "flash-linear-attention~=0.4.0",
+    "nv-grouped-gemm~=1.1",
     "megatron-energon[av_decode]~=6.0",
     "av",
     "flashinfer-python~=0.5.0",
     "wget",
     "onnxscript",
-    "fastapi~=0.50",                                    # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
+    "fastapi~=0.50",                                          # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
     "datasets",
-    "emerging_optimizers",
-    "hypercorn",
+    "emerging_optimizers; python_version >= '3.12'",
     "quart",
+    "hypercorn",
     "openai[aiohttp]",
-    "orjson",
 ]
 
 lts = [
@@ -123,6 +108,7 @@ lts = [
     "opentelemetry-api~=1.33.1",
     "mamba-ssm~=2.2",
     "causal-conv1d~=1.5",
+    "nv-grouped-gemm~=1.1",
     "megatron-energon[av_decode]~=6.0",
     "av",
     "flashinfer-python~=0.5.0",
@@ -130,7 +116,7 @@ lts = [
     "onnxscript",
     "fastapi~=0.50",                      # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
     "datasets",
-    "emerging_optimizers",
+    "emerging_optimizers; python_version >= '3.12'",
 ]
 
 [dependency-groups]
@@ -140,7 +126,6 @@ test = [
     "wrapt",
     "pytest==8.3.5",
     "pytest-mock",
-    "mock",
     "pytest-cov",
     "pytest-random-order",
     "pytest-asyncio",
@@ -175,16 +160,17 @@ linting = [
     "pylint==3.2.6",
 ]
 ci = ["python-gitlab", "slack-sdk", "pandas"]
-no_pypi_wheels = ["flash_mla", "emerging_optimizers"]
+no_pypi_wheels = ["emerging_optimizers; python_version >= '3.12'", "fast-hadamard-transform"]
 
 [tool.uv]
 default-groups = ["linting", "build", "test"]
 no-build-isolation-package = [
     "causal-conv1d",
-    "flash_mla",
+    "nv-grouped-gemm",
     "mamba-ssm",
     "transformer-engine",
     "transformer-engine-torch",
+    "fast-hadamard-transform",
 ]
 link-mode = "copy"
 conflicts = [[{ extra = "lts" }, { extra = "dev" }]]
@@ -202,10 +188,10 @@ override-dependencies = [
 flash_mla = [
     { git = "https://github.com/deepseek-ai/FlashMLA", rev = "9edee0c022cd0938148a18e334203b0aab43aa19" },
 ]
-transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "287770466f0f4433052260a765db5ff7b8be1320" }
+transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "5671fd3675906cda1ade26c24a65d3dedd88eb89" }
 nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "01a9a8ba360f7b2908728ad0516e0ad9d936966d" }
-emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "v0.1.0" }
-nvidia-resiliency-ext = { git = "https://github.com/NVIDIA/nvidia-resiliency-ext.git", rev = "v0.5.0" }
+emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "v0.2.0" }
+fast-hadamard-transform = { git = "https://github.com/Dao-AILab/fast-hadamard-transform.git", rev = "f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" }
 
 [tool.isort]
 profile = "black"                                                          # black-compatible
@@ -247,10 +233,8 @@ markers = [
 concurrency = ["thread", "multiprocessing"]
 omit = [
     "/tmp/*",
-    "/opt/megatron-lm/tests/*",
-    "/opt/megatron-lm/tools/*",
+    "/workspace/tests/*",
     "/usr/local/lib/python3.12/dist-packages/*",
-    "/opt/megatron-lm/_remote_module_non_scriptable",
 ]
 parallel = true
 sigterm = false
diff --git a/tests/functional_tests/shell_test_utils/run_ci_test.sh b/tests/functional_tests/shell_test_utils/run_ci_test.sh
index 3d47e591749..fa3ed2f4db9 100644
--- a/tests/functional_tests/shell_test_utils/run_ci_test.sh
+++ b/tests/functional_tests/shell_test_utils/run_ci_test.sh
@@ -149,6 +149,10 @@ for i in $(seq 1 $N_REPEAT); do
 
     # First run never loads from a checkpoint
     export RUN_NUMBER=1
+    DIR=$(dirname "$_TENSORBOARD_PATH")
+    FILE=$(basename "$_TENSORBOARD_PATH")
+    export TENSORBOARD_PATH=$DIR/$i/$FILE
+    mkdir -p $(dirname $TENSORBOARD_PATH)
     export REPEAT=$i
     export CHECKPOINT_SAVE_PATH=$_CHECKPOINT_SAVE_PATH
     export TRAINING_EXIT_CODE=0
diff --git a/tests/functional_tests/test_cases/ci_base_config.yml b/tests/functional_tests/test_cases/ci_base_config.yml
new file mode 100644
index 00000000000..739f343da9d
--- /dev/null
+++ b/tests/functional_tests/test_cases/ci_base_config.yml
@@ -0,0 +1,14 @@
+MODEL_ARGS:
+  # Add logging args
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-params-norm: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-throughput: true
+  --log-interval: 1
+  --logging-level: 40
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  # Add checkpointing args
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml
index 3bde7ac0d2d..008685c4b3d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml
@@ -23,7 +23,7 @@ MODEL_ARGS:
   --micro-batch-size: 4
   --rampup-batch-size: "[384 384 97656250]"
   --global-batch-size: 1152
-  --train-samples: 4882812
+  --train-samples: 19531250
   --manual-gc: true
   # Transformer Engine args
   --transformer-impl: transformer_engine
@@ -68,7 +68,6 @@ MODEL_ARGS:
   --eval-iters: 32
   --eval-interval: 2000
   # Add checkpointing args
-  --load: ${CHECKPOINT_LOAD_PATH}
   --save: ${CHECKPOINT_SAVE_PATH}
   --save-interval: 1000
   --save-retain-interval: 5000
@@ -88,7 +87,7 @@ MODEL_ARGS:
   --wandb-exp-name: ${WANDB_EXPERIMENT}
   # Add mixed precision args
   --bf16: true
-  --exit-interval: 13000
+  --exit-interval: 10200
   --wandb-save-dir: ${WANDB_SAVE_PATH}
   --async-save: true
   --use-persistent-ckpt-worker: true
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
index 1ba701443ce..7fa302274bf 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
@@ -534,4 +534,4 @@
             "100": 0.16898
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
index d8ec5426bd1..363e94d8f52 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
@@ -284,4 +284,4 @@
             "50": 0.16165
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_dsa/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_dsa/model_config.yaml
new file mode 100644
index 00000000000..63a0933313c
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_dsa/model_config.yaml
@@ -0,0 +1,66 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+  ENABLE_LIGHTWEIGHT_MODE: true
+MODEL_ARGS:
+  --num-layers: 4
+  --hidden-size: 512
+  --num-attention-heads: 8
+  --multi-latent-attention: true
+  --q-lora-rank: 192
+  --kv-lora-rank: 64
+  --qk-head-dim: 16
+  --qk-pos-emb-head-dim: 8
+  --v-head-dim: 16
+  --experimental-attention-variant: dsa
+  --dsa-indexer-n-heads: 64
+  --dsa-indexer-head-dim: 128
+  --dsa-indexer-topk: 2048
+  --dsa-indexer-loss-coeff: 0.01
+  --attention-backend: fused
+  --log-params-norm: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --seq-length: 1024
+  --max-position-embeddings: 1024
+  --train-iters: 50
+  --timing-log-level: 0
+  --lr-decay-iters: 320000
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  --distributed-backend: nccl
+  --lr: 0.00015
+  --lr-decay-style: cosine
+  --min-lr: 1.0e-5
+  --weight-decay: 1e-2
+  --clip-grad: 1.0
+  --lr-warmup-fraction: .01
+  --log-interval: 1
+  --save-interval: 25
+  --eval-interval: 1000
+  --eval-iters: 10
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 2
+  --sequence-parallel: true
+  --untie-embeddings-and-output-weights: true
+  --deterministic-mode: true
+  --no-gradient-accumulation-fusion: true
+  --attention-softmax-in-fp32: true
+  --use-mcore-models: true
+  --ckpt-format: torch_dist
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --bf16: true
+  --attention-backend: unfused
+  --log-memory-to-tensorboard: true
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..40c4236aaba
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/golden_values_dev_dgx_h100.json
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 10.86149,
+            "2": 10.85467,
+            "3": 10.86695,
+            "4": 10.84622,
+            "5": 10.88467,
+            "6": 10.89675,
+            "7": 10.87274,
+            "8": 10.86587,
+            "9": 10.86993,
+            "10": 10.83755,
+            "11": 10.89458,
+            "12": 10.87951,
+            "13": 10.87683,
+            "14": 10.90359,
+            "15": 10.8311,
+            "16": 10.83451,
+            "17": 10.8006,
+            "18": 10.82062,
+            "19": 10.81464,
+            "20": 10.71811,
+            "21": 10.68628,
+            "22": 10.532,
+            "23": 10.70484,
+            "24": 10.58546,
+            "25": 10.51896,
+            "26": 10.58493,
+            "27": 10.60104,
+            "28": 10.53535,
+            "29": 10.57113,
+            "30": 10.33245,
+            "31": 10.05828,
+            "32": 10.42782,
+            "33": 10.42024,
+            "34": 10.16984,
+            "35": 10.23069,
+            "36": 10.18748,
+            "37": 10.31248,
+            "38": 10.1421,
+            "39": 10.38137,
+            "40": 10.04848,
+            "41": 10.10328,
+            "42": 10.17152,
+            "43": 9.78294,
+            "44": 9.90964,
+            "45": 9.785,
+            "46": 9.7688,
+            "47": 10.10084,
+            "48": 9.80968,
+            "49": 9.48778,
+            "50": 9.8671
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1732.0,
+            "2": 34586.0,
+            "3": 1628.0,
+            "4": 1806.0,
+            "5": 1834.0,
+            "6": 1858.0,
+            "7": 1772.0,
+            "8": 1665.0,
+            "9": 34627.0,
+            "10": 1456.0,
+            "11": 34535.0,
+            "12": 34448.0,
+            "13": 34667.0,
+            "14": 1796.0,
+            "15": 1927.0,
+            "16": 1877.0,
+            "17": 34649.0,
+            "18": 34420.0,
+            "19": 1769.0,
+            "20": 1649.0,
+            "21": 34642.0,
+            "22": 34433.0,
+            "23": 34799.0,
+            "24": 1646.0,
+            "25": 34511.0,
+            "26": 34458.0,
+            "27": 34560.0,
+            "28": 2009.0,
+            "29": 34850.0,
+            "30": 1856.0,
+            "31": 34387.0,
+            "32": 34646.0,
+            "33": 34964.0,
+            "34": 1977.0,
+            "35": 34773.0,
+            "36": 34665.0,
+            "37": 2428.0,
+            "38": 35045.0,
+            "39": 35161.0,
+            "40": 2201.0,
+            "41": 35100.0,
+            "42": 2389.0,
+            "43": 34872.0,
+            "44": 34922.0,
+            "45": 2153.0,
+            "46": 35027.0,
+            "47": 35293.0,
+            "48": 35249.0,
+            "49": 35127.0,
+            "50": 35248.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 555746816.0,
+            "2": 555746816.0,
+            "3": 555746816.0,
+            "4": 555746816.0,
+            "5": 555746816.0,
+            "6": 555746816.0,
+            "7": 555746816.0,
+            "8": 555746816.0,
+            "9": 555746816.0,
+            "10": 555746816.0,
+            "11": 555746816.0,
+            "12": 555746816.0,
+            "13": 555746816.0,
+            "14": 555746816.0,
+            "15": 555746816.0,
+            "16": 555746816.0,
+            "17": 555746816.0,
+            "18": 555746816.0,
+            "19": 555746816.0,
+            "20": 555746816.0,
+            "21": 555746816.0,
+            "22": 555746816.0,
+            "23": 555746816.0,
+            "24": 555746816.0,
+            "25": 555746816.0,
+            "26": 555746816.0,
+            "27": 555746816.0,
+            "28": 555746816.0,
+            "29": 555746816.0,
+            "30": 555746816.0,
+            "31": 555746816.0,
+            "32": 555746816.0,
+            "33": 555746816.0,
+            "34": 555746816.0,
+            "35": 555746816.0,
+            "36": 555746816.0,
+            "37": 555746816.0,
+            "38": 555746816.0,
+            "39": 555746816.0,
+            "40": 555746816.0,
+            "41": 555746816.0,
+            "42": 555746816.0,
+            "43": 555746816.0,
+            "44": 555746816.0,
+            "45": 555746816.0,
+            "46": 555746816.0,
+            "47": 555746816.0,
+            "48": 555746816.0,
+            "49": 555746816.0,
+            "50": 555746816.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 1728349696.0,
+            "2": 1917909504.0,
+            "3": 1917909504.0,
+            "4": 1917909504.0,
+            "5": 1917909504.0,
+            "6": 1917909504.0,
+            "7": 1917909504.0,
+            "8": 1917909504.0,
+            "9": 1917909504.0,
+            "10": 1917909504.0,
+            "11": 1917909504.0,
+            "12": 1917909504.0,
+            "13": 1917909504.0,
+            "14": 1917909504.0,
+            "15": 1917909504.0,
+            "16": 1917909504.0,
+            "17": 1917909504.0,
+            "18": 1917909504.0,
+            "19": 1917909504.0,
+            "20": 1917909504.0,
+            "21": 1917909504.0,
+            "22": 1917909504.0,
+            "23": 1917909504.0,
+            "24": 1917909504.0,
+            "25": 1917909504.0,
+            "26": 1917909504.0,
+            "27": 1917909504.0,
+            "28": 1917909504.0,
+            "29": 1917909504.0,
+            "30": 1917909504.0,
+            "31": 1917909504.0,
+            "32": 1917909504.0,
+            "33": 1917909504.0,
+            "34": 1917909504.0,
+            "35": 1917909504.0,
+            "36": 1917909504.0,
+            "37": 1917909504.0,
+            "38": 1917909504.0,
+            "39": 1917909504.0,
+            "40": 1917909504.0,
+            "41": 1917909504.0,
+            "42": 1917909504.0,
+            "43": 1917909504.0,
+            "44": 1917909504.0,
+            "45": 1917909504.0,
+            "46": 1917909504.0,
+            "47": 1917909504.0,
+            "48": 1917909504.0,
+            "49": 1917909504.0,
+            "50": 1917909504.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": "nan",
+            "2": 30.27287,
+            "3": 0.63036,
+            "4": 0.62463,
+            "5": 0.62389,
+            "6": 0.62241,
+            "7": 0.62274,
+            "8": 0.62116,
+            "9": 0.62223,
+            "10": 0.62501,
+            "11": 0.62222,
+            "12": 0.62201,
+            "13": 0.6223,
+            "14": 0.62539,
+            "15": 0.62434,
+            "16": 0.62424,
+            "17": 0.62735,
+            "18": 0.62325,
+            "19": 0.62244,
+            "20": 0.62506,
+            "21": 0.62317,
+            "22": 0.62235,
+            "23": 0.625,
+            "24": 0.62205,
+            "25": 0.62519,
+            "26": 0.64769,
+            "27": 0.62564,
+            "28": 0.62374,
+            "29": 0.62533,
+            "30": 0.62018,
+            "31": 0.62779,
+            "32": 0.62201,
+            "33": 0.63514,
+            "34": 0.6314,
+            "35": 0.63737,
+            "36": 0.62906,
+            "37": 0.64653,
+            "38": 0.63058,
+            "39": 0.63017,
+            "40": 0.63041,
+            "41": 0.6331,
+            "42": 0.62522,
+            "43": 0.62568,
+            "44": 0.62119,
+            "45": 0.62536,
+            "46": 0.62217,
+            "47": 0.62615,
+            "48": 0.6199,
+            "49": 0.61769,
+            "50": 0.62242
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/model_config.yaml
new file mode 100644
index 00000000000..686c8bdbb59
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mhc/model_config.yaml
@@ -0,0 +1,62 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  NCCL_ALGO: Ring
+  CUBLAS_WORKSPACE_CONFIG: :4096:8
+MODEL_ARGS:
+  --num-layers: 12
+  --hidden-size: 512
+  --num-attention-heads: 8
+  --log-params-norm: true
+  --log-num-zeros-in-grad: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-timers-to-tensorboard: true
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  --micro-batch-size: 4
+  --global-batch-size: 32
+  --seq-length: 1024
+  --max-position-embeddings: 1024
+  --train-iters: 50
+  --timing-log-level: 0
+  --lr-decay-iters: 320000
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  --distributed-backend: nccl
+  --lr: 0.00015
+  --lr-decay-style: cosine
+  --min-lr: 1.0e-5
+  --weight-decay: 1e-2
+  --clip-grad: 1.0
+  --lr-warmup-fraction: .01
+  --log-interval: 1
+  --save-interval: 25
+  --eval-interval: 50
+  --eval-iters: 50
+  --transformer-impl: transformer_engine
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 2
+  --deterministic-mode: true
+  --no-gradient-accumulation-fusion: true
+  --attention-softmax-in-fp32: true
+  --use-mcore-models: true
+  --ckpt-format: torch_dist
+  --dist-ckpt-optim-fully-reshardable: true
+  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --bf16: true
+  --attention-backend: unfused
+  --sequence-parallel: true
+  --log-memory-to-tensorboard: true
+  --enable-hyper-connections: true
+  --num-residual-streams: 4
+  --mhc-sinkhorn-iterations: 20
+  --mhc-init-gating-factor: 0.01
+  --recompute-granularity: selective
+  --recompute-modules: "[mhc]"
+  --mhc-recompute-layer-num: 2
+  --exit-interval: 50
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..f56b5fa6f77
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_h100.json
@@ -0,0 +1,10037 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85954,
+            "2": 10.88017,
+            "3": 10.87732,
+            "4": 10.8999,
+            "5": 10.88699,
+            "6": 10.87335,
+            "7": 10.88219,
+            "8": 10.87225,
+            "9": 10.87277,
+            "10": 10.87494,
+            "11": 10.85221,
+            "12": 10.84405,
+            "13": 10.84222,
+            "14": 10.86461,
+            "15": 10.78656,
+            "16": 10.81059,
+            "17": 10.77436,
+            "18": 10.81246,
+            "19": 10.72203,
+            "20": 10.69596,
+            "21": 10.64272,
+            "22": 10.64956,
+            "23": 10.65288,
+            "24": 10.54233,
+            "25": 10.55491,
+            "26": 10.63818,
+            "27": 10.44117,
+            "28": 10.46928,
+            "29": 10.34986,
+            "30": 10.24645,
+            "31": 10.42625,
+            "32": 10.33791,
+            "33": 10.19559,
+            "34": 10.14074,
+            "35": 10.22182,
+            "36": 10.13202,
+            "37": 10.07533,
+            "38": 10.01538,
+            "39": 10.02986,
+            "40": 10.05768,
+            "41": 9.93219,
+            "42": 9.93962,
+            "43": 9.8498,
+            "44": 9.97902,
+            "45": 9.99946,
+            "46": 9.83276,
+            "47": 9.99696,
+            "48": 9.80958,
+            "49": 9.94884,
+            "50": 9.94537,
+            "51": 9.58197,
+            "52": 9.79331,
+            "53": 9.62548,
+            "54": 9.88686,
+            "55": 9.73482,
+            "56": 9.84492,
+            "57": 9.85708,
+            "58": 9.87627,
+            "59": 9.54205,
+            "60": 9.64489,
+            "61": 9.88334,
+            "62": 9.75928,
+            "63": 9.68107,
+            "64": 9.82461,
+            "65": 9.59476,
+            "66": 9.62868,
+            "67": 9.74002,
+            "68": 9.60205,
+            "69": 9.29216,
+            "70": 9.42139,
+            "71": 9.78753,
+            "72": 9.7124,
+            "73": 9.61815,
+            "74": 9.44773,
+            "75": 9.23898,
+            "76": 9.50824,
+            "77": 9.5795,
+            "78": 9.56058,
+            "79": 9.30801,
+            "80": 9.35768,
+            "81": 9.45813,
+            "82": 9.55358,
+            "83": 9.53407,
+            "84": 9.35442,
+            "85": 9.3992,
+            "86": 9.65282,
+            "87": 9.23449,
+            "88": 9.48753,
+            "89": 9.22214,
+            "90": 9.41067,
+            "91": 9.38753,
+            "92": 9.37682,
+            "93": 9.36024,
+            "94": 9.51507,
+            "95": 9.42125,
+            "96": 9.33616,
+            "97": 9.20399,
+            "98": 9.4954,
+            "99": 9.29284,
+            "100": 9.35905,
+            "101": 9.24757,
+            "102": 9.24676,
+            "103": 9.07735,
+            "104": 9.16669,
+            "105": 9.37858,
+            "106": 9.1496,
+            "107": 9.1756,
+            "108": 9.316,
+            "109": 9.29109,
+            "110": 9.36426,
+            "111": 9.17995,
+            "112": 9.23471,
+            "113": 9.35297,
+            "114": 9.35265,
+            "115": 9.32672,
+            "116": 9.00223,
+            "117": 9.06476,
+            "118": 9.06643,
+            "119": 9.22418,
+            "120": 9.08485,
+            "121": 9.19671,
+            "122": 9.14164,
+            "123": 9.25933,
+            "124": 9.45506,
+            "125": 9.21512,
+            "126": 9.06416,
+            "127": 9.01814,
+            "128": 9.22131,
+            "129": 8.98184,
+            "130": 9.13972,
+            "131": 9.15856,
+            "132": 9.03559,
+            "133": 8.85977,
+            "134": 9.18539,
+            "135": 8.88999,
+            "136": 9.16801,
+            "137": 9.15771,
+            "138": 9.23511,
+            "139": 9.09197,
+            "140": 8.87218,
+            "141": 9.29906,
+            "142": 9.19961,
+            "143": 9.1169,
+            "144": 9.24305,
+            "145": 9.10446,
+            "146": 8.98709,
+            "147": 8.98617,
+            "148": 9.13261,
+            "149": 9.06335,
+            "150": 9.01504,
+            "151": 8.92787,
+            "152": 8.8739,
+            "153": 9.06335,
+            "154": 9.17913,
+            "155": 9.13381,
+            "156": 9.04889,
+            "157": 9.15064,
+            "158": 9.04955,
+            "159": 9.03261,
+            "160": 8.88987,
+            "161": 9.04543,
+            "162": 8.89584,
+            "163": 8.84272,
+            "164": 8.97534,
+            "165": 8.93132,
+            "166": 8.65959,
+            "167": 8.83243,
+            "168": 8.81953,
+            "169": 8.6566,
+            "170": 9.04622,
+            "171": 8.72286,
+            "172": 8.82159,
+            "173": 8.91163,
+            "174": 8.84751,
+            "175": 8.70611,
+            "176": 8.75439,
+            "177": 8.7626,
+            "178": 8.7201,
+            "179": 8.64046,
+            "180": 8.74053,
+            "181": 8.69404,
+            "182": 8.72193,
+            "183": 9.08364,
+            "184": 8.6088,
+            "185": 8.88346,
+            "186": 8.74191,
+            "187": 8.56949,
+            "188": 8.67975,
+            "189": 8.86478,
+            "190": 8.53542,
+            "191": 8.66632,
+            "192": 8.61266,
+            "193": 8.57469,
+            "194": 8.75195,
+            "195": 8.59279,
+            "196": 8.77393,
+            "197": 8.74234,
+            "198": 8.62722,
+            "199": 8.77454,
+            "200": 8.73803,
+            "201": 8.66979,
+            "202": 8.54593,
+            "203": 8.54185,
+            "204": 8.71307,
+            "205": 8.2228,
+            "206": 8.8603,
+            "207": 8.68157,
+            "208": 8.70896,
+            "209": 8.75303,
+            "210": 8.57807,
+            "211": 8.84258,
+            "212": 8.49127,
+            "213": 8.57327,
+            "214": 8.51199,
+            "215": 8.5645,
+            "216": 8.50863,
+            "217": 8.53183,
+            "218": 8.52998,
+            "219": 8.64367,
+            "220": 8.54746,
+            "221": 8.39991,
+            "222": 8.50528,
+            "223": 8.43775,
+            "224": 8.53014,
+            "225": 8.57091,
+            "226": 8.4394,
+            "227": 8.67918,
+            "228": 8.38473,
+            "229": 8.45045,
+            "230": 8.49717,
+            "231": 8.49832,
+            "232": 8.49783,
+            "233": 8.49539,
+            "234": 8.63795,
+            "235": 8.55875,
+            "236": 8.39461,
+            "237": 8.48826,
+            "238": 8.30522,
+            "239": 8.562,
+            "240": 8.66952,
+            "241": 8.44144,
+            "242": 8.47219,
+            "243": 8.51768,
+            "244": 8.36825,
+            "245": 8.59274,
+            "246": 8.59497,
+            "247": 8.44008,
+            "248": 8.51279,
+            "249": 8.52035,
+            "250": 8.42183,
+            "251": 8.37751,
+            "252": 8.54393,
+            "253": 8.31454,
+            "254": 8.351,
+            "255": 8.29005,
+            "256": 8.20261,
+            "257": 8.394,
+            "258": 8.45386,
+            "259": 8.23708,
+            "260": 8.2437,
+            "261": 8.23617,
+            "262": 8.34919,
+            "263": 8.30683,
+            "264": 8.18831,
+            "265": 8.33481,
+            "266": 8.23369,
+            "267": 7.89923,
+            "268": 8.38063,
+            "269": 8.40466,
+            "270": 8.26271,
+            "271": 8.279,
+            "272": 8.32109,
+            "273": 8.13747,
+            "274": 8.09677,
+            "275": 8.01372,
+            "276": 7.92611,
+            "277": 8.24041,
+            "278": 8.05017,
+            "279": 7.96688,
+            "280": 7.75652,
+            "281": 8.10713,
+            "282": 8.15049,
+            "283": 8.15621,
+            "284": 8.10354,
+            "285": 8.07234,
+            "286": 7.90454,
+            "287": 7.9963,
+            "288": 8.24862,
+            "289": 8.17575,
+            "290": 8.13093,
+            "291": 8.25763,
+            "292": 8.08131,
+            "293": 8.12059,
+            "294": 7.98178,
+            "295": 7.97108,
+            "296": 8.24114,
+            "297": 7.79647,
+            "298": 8.04847,
+            "299": 7.94257,
+            "300": 7.85748,
+            "301": 8.01649,
+            "302": 7.95112,
+            "303": 7.99606,
+            "304": 7.96394,
+            "305": 8.00301,
+            "306": 7.98312,
+            "307": 7.99372,
+            "308": 8.00491,
+            "309": 8.01362,
+            "310": 7.97824,
+            "311": 7.9323,
+            "312": 7.89419,
+            "313": 7.84054,
+            "314": 7.83,
+            "315": 7.8335,
+            "316": 7.75122,
+            "317": 7.934,
+            "318": 7.98841,
+            "319": 7.83343,
+            "320": 7.57896,
+            "321": 7.75427,
+            "322": 7.83781,
+            "323": 7.7769,
+            "324": 7.91623,
+            "325": 7.80539,
+            "326": 7.65641,
+            "327": 7.86989,
+            "328": 7.79369,
+            "329": 7.89137,
+            "330": 7.7586,
+            "331": 7.52885,
+            "332": 7.81946,
+            "333": 7.84359,
+            "334": 7.68375,
+            "335": 7.69975,
+            "336": 7.91931,
+            "337": 7.65356,
+            "338": 7.90277,
+            "339": 7.7307,
+            "340": 7.7606,
+            "341": 7.70898,
+            "342": 7.82827,
+            "343": 7.61824,
+            "344": 7.58818,
+            "345": 7.61602,
+            "346": 7.46415,
+            "347": 7.5612,
+            "348": 7.68737,
+            "349": 7.58361,
+            "350": 7.65762,
+            "351": 7.75424,
+            "352": 7.711,
+            "353": 7.50477,
+            "354": 7.74925,
+            "355": 7.77011,
+            "356": 7.78305,
+            "357": 7.81855,
+            "358": 7.60031,
+            "359": 7.55187,
+            "360": 7.63213,
+            "361": 7.55298,
+            "362": 7.76875,
+            "363": 7.59465,
+            "364": 7.57928,
+            "365": 7.62839,
+            "366": 7.31096,
+            "367": 7.55919,
+            "368": 7.44577,
+            "369": 7.3551,
+            "370": 7.46985,
+            "371": 7.46609,
+            "372": 7.65475,
+            "373": 7.52989,
+            "374": 7.44843,
+            "375": 7.53627,
+            "376": 7.35288,
+            "377": 7.24313,
+            "378": 7.54312,
+            "379": 7.4994,
+            "380": 7.38859,
+            "381": 7.47577,
+            "382": 7.29951,
+            "383": 7.28478,
+            "384": 7.4126,
+            "385": 7.39829,
+            "386": 7.23652,
+            "387": 7.42535,
+            "388": 7.28487,
+            "389": 7.44425,
+            "390": 7.24578,
+            "391": 7.6482,
+            "392": 7.34245,
+            "393": 7.42463,
+            "394": 7.48248,
+            "395": 7.44483,
+            "396": 7.29231,
+            "397": 7.23386,
+            "398": 7.42507,
+            "399": 7.16173,
+            "400": 7.30149,
+            "401": 7.3585,
+            "402": 7.39832,
+            "403": 7.28806,
+            "404": 7.30832,
+            "405": 7.27202,
+            "406": 7.22485,
+            "407": 7.36688,
+            "408": 7.18877,
+            "409": 7.17334,
+            "410": 7.31999,
+            "411": 7.2223,
+            "412": 7.20595,
+            "413": 7.24047,
+            "414": 6.9176,
+            "415": 7.3341,
+            "416": 7.43139,
+            "417": 7.0298,
+            "418": 7.28201,
+            "419": 7.04286,
+            "420": 7.41864,
+            "421": 7.18456,
+            "422": 7.24003,
+            "423": 7.09785,
+            "424": 7.24581,
+            "425": 7.32182,
+            "426": 7.29342,
+            "427": 7.1359,
+            "428": 7.09617,
+            "429": 6.87976,
+            "430": 7.20691,
+            "431": 7.00662,
+            "432": 7.23762,
+            "433": 6.97996,
+            "434": 6.96131,
+            "435": 7.02219,
+            "436": 7.01484,
+            "437": 6.9921,
+            "438": 7.00514,
+            "439": 6.94235,
+            "440": 7.06367,
+            "441": 7.04936,
+            "442": 7.10187,
+            "443": 7.0941,
+            "444": 6.71175,
+            "445": 6.99825,
+            "446": 7.14631,
+            "447": 7.12745,
+            "448": 6.98621,
+            "449": 7.0508,
+            "450": 7.01761,
+            "451": 6.83255,
+            "452": 6.9157,
+            "453": 7.02056,
+            "454": 6.97019,
+            "455": 7.03145,
+            "456": 6.99451,
+            "457": 6.97283,
+            "458": 6.9066,
+            "459": 6.69482,
+            "460": 7.06773,
+            "461": 7.09857,
+            "462": 6.87116,
+            "463": 7.05522,
+            "464": 6.64922,
+            "465": 7.02852,
+            "466": 7.00594,
+            "467": 6.99935,
+            "468": 6.95215,
+            "469": 6.8291,
+            "470": 7.04615,
+            "471": 6.88316,
+            "472": 6.96104,
+            "473": 6.82398,
+            "474": 6.97228,
+            "475": 7.16917,
+            "476": 6.76379,
+            "477": 6.89771,
+            "478": 6.91142,
+            "479": 6.70396,
+            "480": 7.03025,
+            "481": 6.99763,
+            "482": 6.73608,
+            "483": 6.78502,
+            "484": 6.75413,
+            "485": 6.93205,
+            "486": 7.06796,
+            "487": 6.63653,
+            "488": 6.88737,
+            "489": 6.77108,
+            "490": 6.82685,
+            "491": 6.71122,
+            "492": 6.69849,
+            "493": 6.77155,
+            "494": 6.67651,
+            "495": 6.63733,
+            "496": 6.59006,
+            "497": 6.84564,
+            "498": 6.65256,
+            "499": 6.85952,
+            "500": 6.65795,
+            "501": 6.73562,
+            "502": 6.84527,
+            "503": 6.71173,
+            "504": 6.62075,
+            "505": 6.62291,
+            "506": 6.75234,
+            "507": 6.86844,
+            "508": 6.86157,
+            "509": 6.6555,
+            "510": 6.82834,
+            "511": 6.74132,
+            "512": 6.74051,
+            "513": 6.66032,
+            "514": 6.71273,
+            "515": 6.45045,
+            "516": 6.74436,
+            "517": 6.71073,
+            "518": 6.53817,
+            "519": 6.63527,
+            "520": 6.85868,
+            "521": 6.66571,
+            "522": 6.70871,
+            "523": 6.74553,
+            "524": 6.73396,
+            "525": 6.6762,
+            "526": 6.4139,
+            "527": 6.79901,
+            "528": 6.66011,
+            "529": 6.63182,
+            "530": 6.62611,
+            "531": 6.64289,
+            "532": 6.63292,
+            "533": 6.76391,
+            "534": 6.61301,
+            "535": 6.74754,
+            "536": 6.62605,
+            "537": 6.63867,
+            "538": 6.53166,
+            "539": 6.5542,
+            "540": 6.5862,
+            "541": 6.45207,
+            "542": 6.66957,
+            "543": 6.68064,
+            "544": 6.67601,
+            "545": 6.81307,
+            "546": 6.63333,
+            "547": 6.41838,
+            "548": 6.72367,
+            "549": 6.69982,
+            "550": 6.52974,
+            "551": 6.7478,
+            "552": 6.63991,
+            "553": 6.48451,
+            "554": 6.63407,
+            "555": 6.4629,
+            "556": 6.61792,
+            "557": 6.63496,
+            "558": 6.3874,
+            "559": 6.37379,
+            "560": 6.58293,
+            "561": 6.73352,
+            "562": 6.6356,
+            "563": 6.7444,
+            "564": 6.35291,
+            "565": 6.51482,
+            "566": 6.70247,
+            "567": 6.56973,
+            "568": 6.51145,
+            "569": 6.45578,
+            "570": 6.36768,
+            "571": 6.63597,
+            "572": 6.31359,
+            "573": 6.58668,
+            "574": 6.47613,
+            "575": 6.64961,
+            "576": 6.5168,
+            "577": 6.53078,
+            "578": 6.4847,
+            "579": 6.46709,
+            "580": 6.56793,
+            "581": 6.60857,
+            "582": 6.48362,
+            "583": 6.51541,
+            "584": 6.52831,
+            "585": 6.42713,
+            "586": 6.4178,
+            "587": 6.46113,
+            "588": 6.56878,
+            "589": 6.62653,
+            "590": 6.29114,
+            "591": 6.67541,
+            "592": 6.26902,
+            "593": 6.4773,
+            "594": 6.38719,
+            "595": 6.3632,
+            "596": 6.26099,
+            "597": 6.18986,
+            "598": 6.45726,
+            "599": 6.3998,
+            "600": 6.45709,
+            "601": 6.26132,
+            "602": 6.5338,
+            "603": 6.52288,
+            "604": 6.38993,
+            "605": 6.49993,
+            "606": 6.31475,
+            "607": 6.53507,
+            "608": 6.67525,
+            "609": 6.17714,
+            "610": 6.57295,
+            "611": 6.40188,
+            "612": 6.57929,
+            "613": 6.42667,
+            "614": 6.20672,
+            "615": 6.40081,
+            "616": 6.36019,
+            "617": 6.37969,
+            "618": 6.4512,
+            "619": 6.14244,
+            "620": 6.41233,
+            "621": 6.46338,
+            "622": 6.40096,
+            "623": 6.58352,
+            "624": 6.36078,
+            "625": 6.28553,
+            "626": 6.30525,
+            "627": 6.44574,
+            "628": 6.2557,
+            "629": 6.58813,
+            "630": 6.36641,
+            "631": 6.3498,
+            "632": 6.30972,
+            "633": 6.25733,
+            "634": 6.30887,
+            "635": 6.54592,
+            "636": 6.24834,
+            "637": 6.63634,
+            "638": 6.02046,
+            "639": 6.2798,
+            "640": 6.29548,
+            "641": 6.20953,
+            "642": 6.28471,
+            "643": 6.461,
+            "644": 6.25863,
+            "645": 6.25115,
+            "646": 6.40601,
+            "647": 6.33707,
+            "648": 6.35671,
+            "649": 6.3488,
+            "650": 6.48415,
+            "651": 6.33395,
+            "652": 6.25233,
+            "653": 6.3826,
+            "654": 6.45063,
+            "655": 6.52494,
+            "656": 6.32781,
+            "657": 6.43503,
+            "658": 6.24353,
+            "659": 6.1554,
+            "660": 6.39397,
+            "661": 6.17184,
+            "662": 6.27494,
+            "663": 6.37237,
+            "664": 6.33376,
+            "665": 6.40442,
+            "666": 6.16399,
+            "667": 6.1965,
+            "668": 6.2366,
+            "669": 6.21813,
+            "670": 6.24601,
+            "671": 6.24468,
+            "672": 6.49032,
+            "673": 6.34071,
+            "674": 6.2969,
+            "675": 6.38396,
+            "676": 6.39021,
+            "677": 6.30588,
+            "678": 6.27751,
+            "679": 6.23892,
+            "680": 6.2942,
+            "681": 6.20621,
+            "682": 6.08719,
+            "683": 6.27464,
+            "684": 6.32896,
+            "685": 6.30248,
+            "686": 6.15397,
+            "687": 6.2862,
+            "688": 6.20754,
+            "689": 6.6215,
+            "690": 6.17931,
+            "691": 6.18188,
+            "692": 6.2745,
+            "693": 6.14405,
+            "694": 6.23487,
+            "695": 6.32617,
+            "696": 6.11842,
+            "697": 6.15483,
+            "698": 6.23128,
+            "699": 6.46051,
+            "700": 6.0454,
+            "701": 6.06467,
+            "702": 6.25219,
+            "703": 6.18603,
+            "704": 6.21704,
+            "705": 6.13155,
+            "706": 6.07593,
+            "707": 6.25376,
+            "708": 6.31553,
+            "709": 6.01087,
+            "710": 6.16305,
+            "711": 6.26062,
+            "712": 6.18307,
+            "713": 5.89806,
+            "714": 6.10759,
+            "715": 6.11617,
+            "716": 6.41405,
+            "717": 6.19202,
+            "718": 6.2345,
+            "719": 6.27471,
+            "720": 6.26372,
+            "721": 6.26277,
+            "722": 6.23442,
+            "723": 6.0814,
+            "724": 6.22797,
+            "725": 6.04057,
+            "726": 6.30046,
+            "727": 6.01682,
+            "728": 6.04617,
+            "729": 6.09111,
+            "730": 6.18359,
+            "731": 6.10398,
+            "732": 6.08898,
+            "733": 6.12312,
+            "734": 6.38423,
+            "735": 6.27849,
+            "736": 6.18184,
+            "737": 6.36645,
+            "738": 6.13411,
+            "739": 6.14591,
+            "740": 5.87975,
+            "741": 6.00667,
+            "742": 5.98459,
+            "743": 6.17495,
+            "744": 6.02962,
+            "745": 6.15497,
+            "746": 6.03272,
+            "747": 6.09789,
+            "748": 6.23436,
+            "749": 5.94191,
+            "750": 6.16819,
+            "751": 5.9596,
+            "752": 6.01941,
+            "753": 6.02989,
+            "754": 6.28798,
+            "755": 6.13521,
+            "756": 6.25357,
+            "757": 6.02098,
+            "758": 6.20422,
+            "759": 6.23062,
+            "760": 6.02316,
+            "761": 6.19655,
+            "762": 6.22713,
+            "763": 6.03754,
+            "764": 5.9636,
+            "765": 5.93413,
+            "766": 5.97155,
+            "767": 5.81277,
+            "768": 6.18725,
+            "769": 6.27646,
+            "770": 6.29561,
+            "771": 5.78767,
+            "772": 6.03281,
+            "773": 6.18558,
+            "774": 5.88583,
+            "775": 6.03167,
+            "776": 6.13086,
+            "777": 5.88612,
+            "778": 6.05891,
+            "779": 5.87414,
+            "780": 6.14047,
+            "781": 5.85641,
+            "782": 6.04961,
+            "783": 5.95687,
+            "784": 5.91852,
+            "785": 6.09816,
+            "786": 6.10929,
+            "787": 5.66006,
+            "788": 5.99915,
+            "789": 6.21789,
+            "790": 6.26737,
+            "791": 5.79122,
+            "792": 5.99828,
+            "793": 6.18387,
+            "794": 6.02746,
+            "795": 6.0051,
+            "796": 6.17065,
+            "797": 6.05376,
+            "798": 6.06076,
+            "799": 6.11682,
+            "800": 6.02167,
+            "801": 6.15011,
+            "802": 5.98473,
+            "803": 6.15363,
+            "804": 6.00859,
+            "805": 5.83055,
+            "806": 6.08757,
+            "807": 6.04997,
+            "808": 5.92717,
+            "809": 5.77802,
+            "810": 6.01973,
+            "811": 5.93299,
+            "812": 5.91169,
+            "813": 5.96567,
+            "814": 6.0369,
+            "815": 5.8146,
+            "816": 6.12034,
+            "817": 5.94337,
+            "818": 6.0674,
+            "819": 6.01476,
+            "820": 5.7319,
+            "821": 5.95027,
+            "822": 6.20452,
+            "823": 5.83139,
+            "824": 5.98275,
+            "825": 6.18795,
+            "826": 6.20019,
+            "827": 6.05802,
+            "828": 6.06976,
+            "829": 5.89149,
+            "830": 5.94221,
+            "831": 5.89773,
+            "832": 5.97341,
+            "833": 6.06501,
+            "834": 5.99675,
+            "835": 6.00654,
+            "836": 5.79277,
+            "837": 6.11496,
+            "838": 5.86966,
+            "839": 5.83554,
+            "840": 6.18614,
+            "841": 5.78491,
+            "842": 5.89169,
+            "843": 5.95102,
+            "844": 6.00954,
+            "845": 6.09153,
+            "846": 5.68733,
+            "847": 5.75715,
+            "848": 5.96838,
+            "849": 6.09512,
+            "850": 5.84886,
+            "851": 6.01693,
+            "852": 5.75188,
+            "853": 5.99355,
+            "854": 6.01844,
+            "855": 5.81656,
+            "856": 5.99593,
+            "857": 6.00207,
+            "858": 6.05507,
+            "859": 5.95295,
+            "860": 6.09632,
+            "861": 6.07189,
+            "862": 6.00434,
+            "863": 5.83757,
+            "864": 5.84474,
+            "865": 5.93791,
+            "866": 5.89404,
+            "867": 5.87803,
+            "868": 6.06515,
+            "869": 6.08564,
+            "870": 5.97153,
+            "871": 6.04317,
+            "872": 5.89525,
+            "873": 5.84383,
+            "874": 6.02742,
+            "875": 5.9144,
+            "876": 5.96905,
+            "877": 5.92979,
+            "878": 6.09819,
+            "879": 5.76783,
+            "880": 6.01501,
+            "881": 5.99647,
+            "882": 5.9097,
+            "883": 5.67626,
+            "884": 5.96521,
+            "885": 5.74544,
+            "886": 5.99268,
+            "887": 5.90979,
+            "888": 5.83897,
+            "889": 6.01033,
+            "890": 6.02378,
+            "891": 5.95247,
+            "892": 5.70829,
+            "893": 6.0922,
+            "894": 5.73134,
+            "895": 5.84057,
+            "896": 5.84075,
+            "897": 5.8564,
+            "898": 5.9238,
+            "899": 5.93486,
+            "900": 5.89946,
+            "901": 5.95293,
+            "902": 5.83295,
+            "903": 6.05665,
+            "904": 5.93153,
+            "905": 5.90441,
+            "906": 5.6172,
+            "907": 5.91178,
+            "908": 5.73853,
+            "909": 5.99118,
+            "910": 5.86603,
+            "911": 5.70397,
+            "912": 5.70712,
+            "913": 5.76497,
+            "914": 5.83944,
+            "915": 5.80032,
+            "916": 5.8904,
+            "917": 5.86913,
+            "918": 5.82415,
+            "919": 5.81575,
+            "920": 5.89552,
+            "921": 5.84163,
+            "922": 5.62427,
+            "923": 6.03657,
+            "924": 5.60536,
+            "925": 5.62335,
+            "926": 5.86148,
+            "927": 5.96071,
+            "928": 5.84005,
+            "929": 5.82702,
+            "930": 5.95816,
+            "931": 5.765,
+            "932": 5.59211,
+            "933": 5.6351,
+            "934": 5.80541,
+            "935": 5.63715,
+            "936": 5.83772,
+            "937": 5.96629,
+            "938": 5.59109,
+            "939": 5.7899,
+            "940": 5.96726,
+            "941": 5.7264,
+            "942": 5.83547,
+            "943": 5.86622,
+            "944": 5.95478,
+            "945": 5.70263,
+            "946": 5.55832,
+            "947": 5.74831,
+            "948": 5.79312,
+            "949": 5.8268,
+            "950": 5.84353,
+            "951": 5.72242,
+            "952": 5.69295,
+            "953": 5.67852,
+            "954": 5.72473,
+            "955": 5.53107,
+            "956": 5.62074,
+            "957": 5.84076,
+            "958": 5.79676,
+            "959": 5.57317,
+            "960": 5.80125,
+            "961": 5.82952,
+            "962": 5.76695,
+            "963": 5.76461,
+            "964": 5.70677,
+            "965": 5.64012,
+            "966": 5.59617,
+            "967": 5.72434,
+            "968": 5.74036,
+            "969": 5.82392,
+            "970": 5.64422,
+            "971": 5.7065,
+            "972": 5.85308,
+            "973": 5.66884,
+            "974": 5.71841,
+            "975": 5.86273,
+            "976": 5.70493,
+            "977": 5.77104,
+            "978": 5.6858,
+            "979": 5.58655,
+            "980": 5.75924,
+            "981": 5.8969,
+            "982": 5.47038,
+            "983": 5.61817,
+            "984": 5.54504,
+            "985": 5.59032,
+            "986": 5.64132,
+            "987": 5.56966,
+            "988": 5.70939,
+            "989": 5.69379,
+            "990": 5.62195,
+            "991": 5.84899,
+            "992": 5.77877,
+            "993": 5.87022,
+            "994": 5.69735,
+            "995": 5.73242,
+            "996": 5.73704,
+            "997": 5.81329,
+            "998": 5.83634,
+            "999": 5.83399,
+            "1000": 5.68342,
+            "1001": 5.86668,
+            "1002": 5.76052,
+            "1003": 5.64259,
+            "1004": 5.79811,
+            "1005": 5.53617,
+            "1006": 5.326,
+            "1007": 5.76701,
+            "1008": 5.79136,
+            "1009": 5.65046,
+            "1010": 5.77942,
+            "1011": 5.89493,
+            "1012": 5.62303,
+            "1013": 5.61569,
+            "1014": 5.68111,
+            "1015": 5.55747,
+            "1016": 5.87327,
+            "1017": 5.83312,
+            "1018": 5.61865,
+            "1019": 5.73414,
+            "1020": 5.61755,
+            "1021": 5.848,
+            "1022": 5.50045,
+            "1023": 5.65182,
+            "1024": 5.74493,
+            "1025": 5.5692,
+            "1026": 5.41415,
+            "1027": 5.60696,
+            "1028": 5.6928,
+            "1029": 5.68764,
+            "1030": 5.68746,
+            "1031": 5.40696,
+            "1032": 5.78748,
+            "1033": 5.58136,
+            "1034": 5.61937,
+            "1035": 5.71368,
+            "1036": 5.62818,
+            "1037": 5.3679,
+            "1038": 5.66452,
+            "1039": 5.64347,
+            "1040": 5.57004,
+            "1041": 5.59722,
+            "1042": 5.81329,
+            "1043": 5.566,
+            "1044": 5.46906,
+            "1045": 5.9659,
+            "1046": 5.4866,
+            "1047": 5.38954,
+            "1048": 5.50027,
+            "1049": 5.67182,
+            "1050": 5.6991,
+            "1051": 5.57928,
+            "1052": 5.68227,
+            "1053": 5.62737,
+            "1054": 5.45766,
+            "1055": 5.60313,
+            "1056": 5.67386,
+            "1057": 5.75895,
+            "1058": 5.56782,
+            "1059": 5.74888,
+            "1060": 5.82022,
+            "1061": 5.47624,
+            "1062": 5.64897,
+            "1063": 5.50121,
+            "1064": 5.59136,
+            "1065": 5.55347,
+            "1066": 5.74367,
+            "1067": 5.67235,
+            "1068": 5.44068,
+            "1069": 5.60636,
+            "1070": 5.81264,
+            "1071": 5.51129,
+            "1072": 5.61871,
+            "1073": 5.62147,
+            "1074": 5.524,
+            "1075": 5.70529,
+            "1076": 5.5934,
+            "1077": 5.71153,
+            "1078": 5.56524,
+            "1079": 5.61728,
+            "1080": 5.64251,
+            "1081": 5.62319,
+            "1082": 5.49648,
+            "1083": 5.64086,
+            "1084": 5.55389,
+            "1085": 5.40631,
+            "1086": 5.62008,
+            "1087": 5.44148,
+            "1088": 5.51218,
+            "1089": 5.7676,
+            "1090": 5.53165,
+            "1091": 5.51388,
+            "1092": 5.41011,
+            "1093": 5.70025,
+            "1094": 5.57364,
+            "1095": 5.57735,
+            "1096": 5.61585,
+            "1097": 5.64586,
+            "1098": 5.64877,
+            "1099": 5.51631,
+            "1100": 5.63778,
+            "1101": 5.67335,
+            "1102": 5.54037,
+            "1103": 5.54969,
+            "1104": 5.53882,
+            "1105": 5.54754,
+            "1106": 5.68315,
+            "1107": 5.68556,
+            "1108": 5.78611,
+            "1109": 5.53666,
+            "1110": 5.66598,
+            "1111": 5.58973,
+            "1112": 5.58039,
+            "1113": 5.62611,
+            "1114": 5.61279,
+            "1115": 5.59718,
+            "1116": 5.65925,
+            "1117": 5.64676,
+            "1118": 5.65036,
+            "1119": 5.70919,
+            "1120": 5.62738,
+            "1121": 5.37352,
+            "1122": 5.22976,
+            "1123": 5.47237,
+            "1124": 5.64939,
+            "1125": 5.67974,
+            "1126": 5.679,
+            "1127": 5.56811,
+            "1128": 5.61992,
+            "1129": 5.29637,
+            "1130": 5.54359,
+            "1131": 5.63153,
+            "1132": 5.72427,
+            "1133": 5.51914,
+            "1134": 5.56063,
+            "1135": 5.52056,
+            "1136": 5.42646,
+            "1137": 5.45971,
+            "1138": 5.56927,
+            "1139": 5.41452,
+            "1140": 5.2656,
+            "1141": 5.58265,
+            "1142": 5.64152,
+            "1143": 5.38298,
+            "1144": 5.38584,
+            "1145": 5.36231,
+            "1146": 5.63508,
+            "1147": 5.49183,
+            "1148": 5.50524,
+            "1149": 5.52352,
+            "1150": 5.39801,
+            "1151": 5.5563,
+            "1152": 5.41525,
+            "1153": 5.44791,
+            "1154": 5.49757,
+            "1155": 5.43833,
+            "1156": 5.3488,
+            "1157": 5.66444,
+            "1158": 5.39487,
+            "1159": 5.33455,
+            "1160": 5.79503,
+            "1161": 5.53955,
+            "1162": 5.45818,
+            "1163": 5.52563,
+            "1164": 5.3837,
+            "1165": 5.52861,
+            "1166": 5.48753,
+            "1167": 5.36312,
+            "1168": 5.49491,
+            "1169": 5.39842,
+            "1170": 5.59202,
+            "1171": 5.48502,
+            "1172": 5.64238,
+            "1173": 5.62295,
+            "1174": 5.50843,
+            "1175": 5.34639,
+            "1176": 5.38504,
+            "1177": 5.55461,
+            "1178": 5.46852,
+            "1179": 5.49505,
+            "1180": 5.46014,
+            "1181": 5.56031,
+            "1182": 5.59593,
+            "1183": 5.77155,
+            "1184": 5.54926,
+            "1185": 5.29008,
+            "1186": 5.60451,
+            "1187": 5.55363,
+            "1188": 5.51655,
+            "1189": 5.39133,
+            "1190": 5.40482,
+            "1191": 5.39266,
+            "1192": 5.50142,
+            "1193": 5.46347,
+            "1194": 5.45607,
+            "1195": 5.32751,
+            "1196": 5.52219,
+            "1197": 5.4809,
+            "1198": 5.52789,
+            "1199": 5.3874,
+            "1200": 5.33059,
+            "1201": 5.48969,
+            "1202": 5.43584,
+            "1203": 5.49537,
+            "1204": 5.40861,
+            "1205": 5.48971,
+            "1206": 5.3371,
+            "1207": 5.58625,
+            "1208": 5.4312,
+            "1209": 5.29323,
+            "1210": 5.50765,
+            "1211": 5.51506,
+            "1212": 5.59777,
+            "1213": 5.42123,
+            "1214": 5.51018,
+            "1215": 5.23832,
+            "1216": 5.40989,
+            "1217": 5.38537,
+            "1218": 5.45232,
+            "1219": 5.48221,
+            "1220": 5.38594,
+            "1221": 5.44848,
+            "1222": 5.31032,
+            "1223": 5.47835,
+            "1224": 5.42017,
+            "1225": 5.43499,
+            "1226": 5.3238,
+            "1227": 5.47632,
+            "1228": 5.72418,
+            "1229": 5.32629,
+            "1230": 5.40556,
+            "1231": 5.06972,
+            "1232": 5.78794,
+            "1233": 5.28923,
+            "1234": 5.24535,
+            "1235": 5.37092,
+            "1236": 5.48471,
+            "1237": 5.20864,
+            "1238": 5.41643,
+            "1239": 5.40751,
+            "1240": 5.46767,
+            "1241": 5.57266,
+            "1242": 5.4536,
+            "1243": 5.43063,
+            "1244": 5.51812,
+            "1245": 5.19115,
+            "1246": 5.72042,
+            "1247": 5.43187,
+            "1248": 5.30004,
+            "1249": 5.40113,
+            "1250": 5.33798,
+            "1251": 5.42034,
+            "1252": 5.57217,
+            "1253": 5.48773,
+            "1254": 5.30628,
+            "1255": 5.51443,
+            "1256": 5.60755,
+            "1257": 5.4214,
+            "1258": 5.56457,
+            "1259": 5.48027,
+            "1260": 5.51461,
+            "1261": 5.63883,
+            "1262": 5.39531,
+            "1263": 5.32916,
+            "1264": 5.50671,
+            "1265": 5.30632,
+            "1266": 5.23819,
+            "1267": 5.37206,
+            "1268": 5.39267,
+            "1269": 5.15366,
+            "1270": 5.40418,
+            "1271": 5.27732,
+            "1272": 5.5252,
+            "1273": 5.30228,
+            "1274": 5.3516,
+            "1275": 5.38466,
+            "1276": 5.39786,
+            "1277": 5.46218,
+            "1278": 5.34689,
+            "1279": 5.44274,
+            "1280": 5.45919,
+            "1281": 5.40638,
+            "1282": 5.3824,
+            "1283": 5.42204,
+            "1284": 5.34841,
+            "1285": 5.50133,
+            "1286": 5.33557,
+            "1287": 5.58795,
+            "1288": 5.26493,
+            "1289": 5.429,
+            "1290": 5.50282,
+            "1291": 5.50335,
+            "1292": 5.44662,
+            "1293": 5.41955,
+            "1294": 5.49953,
+            "1295": 5.34675,
+            "1296": 5.19062,
+            "1297": 5.17238,
+            "1298": 5.11916,
+            "1299": 5.30339,
+            "1300": 5.21032,
+            "1301": 5.30157,
+            "1302": 5.27472,
+            "1303": 5.36107,
+            "1304": 5.43231,
+            "1305": 5.36999,
+            "1306": 5.25347,
+            "1307": 5.18829,
+            "1308": 5.27033,
+            "1309": 5.40736,
+            "1310": 5.26399,
+            "1311": 5.38109,
+            "1312": 5.35438,
+            "1313": 5.30056,
+            "1314": 5.2953,
+            "1315": 5.42245,
+            "1316": 5.26148,
+            "1317": 5.28065,
+            "1318": 5.2198,
+            "1319": 5.34619,
+            "1320": 5.42093,
+            "1321": 5.44976,
+            "1322": 5.46399,
+            "1323": 5.37327,
+            "1324": 5.25463,
+            "1325": 5.40657,
+            "1326": 5.54082,
+            "1327": 5.39378,
+            "1328": 5.21893,
+            "1329": 5.41851,
+            "1330": 5.40079,
+            "1331": 5.31685,
+            "1332": 5.31253,
+            "1333": 5.37243,
+            "1334": 5.44685,
+            "1335": 5.37136,
+            "1336": 5.43779,
+            "1337": 5.47852,
+            "1338": 5.30292,
+            "1339": 5.14181,
+            "1340": 5.41486,
+            "1341": 5.3443,
+            "1342": 5.36197,
+            "1343": 5.47816,
+            "1344": 5.37832,
+            "1345": 5.34294,
+            "1346": 5.08195,
+            "1347": 5.38558,
+            "1348": 5.4918,
+            "1349": 5.40832,
+            "1350": 5.02622,
+            "1351": 5.3151,
+            "1352": 5.1591,
+            "1353": 5.34674,
+            "1354": 5.35963,
+            "1355": 5.11092,
+            "1356": 5.2587,
+            "1357": 5.29209,
+            "1358": 5.15773,
+            "1359": 5.11035,
+            "1360": 5.17288,
+            "1361": 5.30521,
+            "1362": 5.06318,
+            "1363": 5.2947,
+            "1364": 5.40031,
+            "1365": 5.02241,
+            "1366": 5.11779,
+            "1367": 5.33051,
+            "1368": 5.18648,
+            "1369": 5.22984,
+            "1370": 5.19906,
+            "1371": 5.2839,
+            "1372": 5.26155,
+            "1373": 5.28402,
+            "1374": 5.28112,
+            "1375": 5.46052,
+            "1376": 5.2713,
+            "1377": 5.26467,
+            "1378": 5.31344,
+            "1379": 5.22741,
+            "1380": 5.26107,
+            "1381": 5.47871,
+            "1382": 5.08923,
+            "1383": 5.375,
+            "1384": 5.35914,
+            "1385": 5.38983,
+            "1386": 5.16417,
+            "1387": 5.16094,
+            "1388": 5.28017,
+            "1389": 5.30376,
+            "1390": 5.25514,
+            "1391": 5.26911,
+            "1392": 5.37008,
+            "1393": 5.38307,
+            "1394": 5.40394,
+            "1395": 5.32492,
+            "1396": 5.21356,
+            "1397": 5.28,
+            "1398": 5.37051,
+            "1399": 5.35873,
+            "1400": 5.26512,
+            "1401": 5.35924,
+            "1402": 5.42148,
+            "1403": 5.20238,
+            "1404": 5.28629,
+            "1405": 5.11984,
+            "1406": 4.99128,
+            "1407": 5.40442,
+            "1408": 5.19825,
+            "1409": 5.3964,
+            "1410": 5.37519,
+            "1411": 4.91758,
+            "1412": 5.35561,
+            "1413": 5.41314,
+            "1414": 5.21823,
+            "1415": 5.44159,
+            "1416": 5.32905,
+            "1417": 5.38859,
+            "1418": 5.29946,
+            "1419": 5.31787,
+            "1420": 5.43974,
+            "1421": 5.39414,
+            "1422": 5.41749,
+            "1423": 5.005,
+            "1424": 5.32995,
+            "1425": 5.58618,
+            "1426": 5.23059,
+            "1427": 5.31804,
+            "1428": 5.33277,
+            "1429": 5.07552,
+            "1430": 5.33075,
+            "1431": 5.32688,
+            "1432": 5.33826,
+            "1433": 5.19107,
+            "1434": 5.16341,
+            "1435": 5.19905,
+            "1436": 5.10851,
+            "1437": 5.229,
+            "1438": 5.31867,
+            "1439": 5.34731,
+            "1440": 5.34991,
+            "1441": 5.16484,
+            "1442": 5.22015,
+            "1443": 5.20933,
+            "1444": 5.13701,
+            "1445": 5.07414,
+            "1446": 5.26836,
+            "1447": 5.25895,
+            "1448": 5.2904,
+            "1449": 5.2498,
+            "1450": 5.34281,
+            "1451": 5.07084,
+            "1452": 5.27052,
+            "1453": 5.1668,
+            "1454": 5.01539,
+            "1455": 5.12292,
+            "1456": 5.2717,
+            "1457": 5.18713,
+            "1458": 5.00608,
+            "1459": 5.22304,
+            "1460": 5.23389,
+            "1461": 5.07142,
+            "1462": 4.96923,
+            "1463": 5.14383,
+            "1464": 5.21128,
+            "1465": 5.26911,
+            "1466": 5.34961,
+            "1467": 5.33438,
+            "1468": 5.22205,
+            "1469": 5.04373,
+            "1470": 5.11715,
+            "1471": 5.25199,
+            "1472": 5.12294,
+            "1473": 5.10395,
+            "1474": 5.21775,
+            "1475": 5.18567,
+            "1476": 5.15287,
+            "1477": 5.26203,
+            "1478": 5.30399,
+            "1479": 5.01175,
+            "1480": 5.1809,
+            "1481": 5.24516,
+            "1482": 5.34866,
+            "1483": 5.26395,
+            "1484": 4.92397,
+            "1485": 5.29179,
+            "1486": 5.04178,
+            "1487": 4.88296,
+            "1488": 5.18145,
+            "1489": 5.10246,
+            "1490": 5.04399,
+            "1491": 5.31709,
+            "1492": 5.22469,
+            "1493": 4.94051,
+            "1494": 5.10929,
+            "1495": 5.13424,
+            "1496": 5.05862,
+            "1497": 5.36633,
+            "1498": 5.30967,
+            "1499": 5.13834,
+            "1500": 5.09851,
+            "1501": 5.03466,
+            "1502": 5.15527,
+            "1503": 5.43143,
+            "1504": 5.31968,
+            "1505": 5.00114,
+            "1506": 5.14444,
+            "1507": 5.16068,
+            "1508": 5.16575,
+            "1509": 5.31451,
+            "1510": 5.0185,
+            "1511": 5.11697,
+            "1512": 4.98287,
+            "1513": 5.16993,
+            "1514": 5.33962,
+            "1515": 5.36563,
+            "1516": 5.27715,
+            "1517": 5.22687,
+            "1518": 5.02626,
+            "1519": 5.29861,
+            "1520": 5.1417,
+            "1521": 5.15866,
+            "1522": 5.32824,
+            "1523": 5.24625,
+            "1524": 5.06725,
+            "1525": 5.20424,
+            "1526": 5.27994,
+            "1527": 5.25677,
+            "1528": 5.23589,
+            "1529": 5.18688,
+            "1530": 5.24365,
+            "1531": 5.09964,
+            "1532": 5.15141,
+            "1533": 5.05087,
+            "1534": 5.21589,
+            "1535": 5.1635,
+            "1536": 5.09678,
+            "1537": 5.02713,
+            "1538": 4.91184,
+            "1539": 5.23801,
+            "1540": 5.11515,
+            "1541": 5.25246,
+            "1542": 5.23484,
+            "1543": 5.05152,
+            "1544": 5.07544,
+            "1545": 5.1161,
+            "1546": 5.33085,
+            "1547": 5.11115,
+            "1548": 5.23527,
+            "1549": 5.23735,
+            "1550": 4.97596,
+            "1551": 5.2566,
+            "1552": 5.02944,
+            "1553": 5.14849,
+            "1554": 5.11205,
+            "1555": 5.10901,
+            "1556": 5.19824,
+            "1557": 5.08883,
+            "1558": 5.23067,
+            "1559": 5.00402,
+            "1560": 5.11835,
+            "1561": 5.14529,
+            "1562": 5.17996,
+            "1563": 5.24454,
+            "1564": 5.26389,
+            "1565": 5.08902,
+            "1566": 5.29474,
+            "1567": 5.04166,
+            "1568": 5.09256,
+            "1569": 5.20014,
+            "1570": 5.17348,
+            "1571": 4.95353,
+            "1572": 5.04005,
+            "1573": 5.02897,
+            "1574": 4.99751,
+            "1575": 5.2314,
+            "1576": 5.21263,
+            "1577": 5.12799,
+            "1578": 5.36241,
+            "1579": 4.94367,
+            "1580": 5.12197,
+            "1581": 5.09638,
+            "1582": 5.28497,
+            "1583": 5.04918,
+            "1584": 5.05482,
+            "1585": 5.11977,
+            "1586": 5.30243,
+            "1587": 5.13447,
+            "1588": 5.2184,
+            "1589": 4.83833,
+            "1590": 5.09497,
+            "1591": 5.17411,
+            "1592": 5.13721,
+            "1593": 5.23457,
+            "1594": 5.11805,
+            "1595": 5.10775,
+            "1596": 5.18964,
+            "1597": 5.11486,
+            "1598": 5.15917,
+            "1599": 5.19102,
+            "1600": 4.86871,
+            "1601": 5.11732,
+            "1602": 5.23185,
+            "1603": 5.19543,
+            "1604": 5.05128,
+            "1605": 5.02692,
+            "1606": 4.98659,
+            "1607": 5.07391,
+            "1608": 4.97985,
+            "1609": 5.07337,
+            "1610": 5.04745,
+            "1611": 4.99848,
+            "1612": 4.75205,
+            "1613": 5.03316,
+            "1614": 4.88034,
+            "1615": 5.07442,
+            "1616": 5.23082,
+            "1617": 5.06132,
+            "1618": 4.98704,
+            "1619": 5.18333,
+            "1620": 5.14491,
+            "1621": 5.31452,
+            "1622": 5.05677,
+            "1623": 5.14346,
+            "1624": 5.1355,
+            "1625": 5.12006,
+            "1626": 5.10245,
+            "1627": 5.10987,
+            "1628": 5.06581,
+            "1629": 4.92971,
+            "1630": 5.06799,
+            "1631": 5.06088,
+            "1632": 5.10428,
+            "1633": 4.97515,
+            "1634": 4.9235,
+            "1635": 5.05833,
+            "1636": 4.92289,
+            "1637": 5.24051,
+            "1638": 5.15574,
+            "1639": 4.977,
+            "1640": 5.00918,
+            "1641": 5.12718,
+            "1642": 5.08305,
+            "1643": 5.04894,
+            "1644": 5.1181,
+            "1645": 4.96677,
+            "1646": 5.11931,
+            "1647": 5.03295,
+            "1648": 5.19969,
+            "1649": 4.92396,
+            "1650": 5.05963,
+            "1651": 4.92965,
+            "1652": 5.21121,
+            "1653": 5.15959,
+            "1654": 5.12828,
+            "1655": 5.16263,
+            "1656": 5.34595,
+            "1657": 5.20677,
+            "1658": 5.04112,
+            "1659": 4.9258,
+            "1660": 4.80954,
+            "1661": 5.03086,
+            "1662": 5.14123,
+            "1663": 5.15449,
+            "1664": 4.981,
+            "1665": 5.11714,
+            "1666": 5.10575,
+            "1667": 4.84897,
+            "1668": 5.11513,
+            "1669": 5.06995,
+            "1670": 5.11266,
+            "1671": 5.17201,
+            "1672": 4.77569,
+            "1673": 5.03851,
+            "1674": 4.91569,
+            "1675": 5.05176,
+            "1676": 5.00402,
+            "1677": 4.79944,
+            "1678": 5.02487,
+            "1679": 4.89421,
+            "1680": 5.03847,
+            "1681": 5.06815,
+            "1682": 5.03274,
+            "1683": 4.90688,
+            "1684": 5.06515,
+            "1685": 5.13579,
+            "1686": 5.0732,
+            "1687": 4.97656,
+            "1688": 5.16537,
+            "1689": 5.14707,
+            "1690": 4.99688,
+            "1691": 5.00011,
+            "1692": 4.91822,
+            "1693": 5.01472,
+            "1694": 4.94657,
+            "1695": 4.91341,
+            "1696": 5.08209,
+            "1697": 5.04294,
+            "1698": 4.9511,
+            "1699": 5.00187,
+            "1700": 4.95393,
+            "1701": 5.16563,
+            "1702": 5.07666,
+            "1703": 5.17125,
+            "1704": 5.14332,
+            "1705": 4.96247,
+            "1706": 4.98333,
+            "1707": 4.79005,
+            "1708": 5.03831,
+            "1709": 5.23334,
+            "1710": 5.02934,
+            "1711": 5.19037,
+            "1712": 5.1958,
+            "1713": 5.03582,
+            "1714": 5.04603,
+            "1715": 4.91495,
+            "1716": 4.9332,
+            "1717": 4.86109,
+            "1718": 5.0273,
+            "1719": 5.12334,
+            "1720": 5.02189,
+            "1721": 4.92752,
+            "1722": 5.05412,
+            "1723": 4.93537,
+            "1724": 5.0407,
+            "1725": 5.1914,
+            "1726": 5.06447,
+            "1727": 4.90742,
+            "1728": 5.02116,
+            "1729": 5.04574,
+            "1730": 4.90343,
+            "1731": 4.99945,
+            "1732": 4.92083,
+            "1733": 5.1311,
+            "1734": 4.82837,
+            "1735": 5.20905,
+            "1736": 4.91585,
+            "1737": 4.85859,
+            "1738": 4.97909,
+            "1739": 5.16688,
+            "1740": 4.83514,
+            "1741": 4.77896,
+            "1742": 4.90909,
+            "1743": 5.08523,
+            "1744": 4.9784,
+            "1745": 4.82327,
+            "1746": 4.94833,
+            "1747": 4.87022,
+            "1748": 5.06379,
+            "1749": 4.8705,
+            "1750": 5.01347,
+            "1751": 5.12189,
+            "1752": 4.90364,
+            "1753": 5.09398,
+            "1754": 5.05918,
+            "1755": 4.89649,
+            "1756": 5.02243,
+            "1757": 5.14389,
+            "1758": 4.8716,
+            "1759": 4.94237,
+            "1760": 4.83366,
+            "1761": 5.02233,
+            "1762": 4.81292,
+            "1763": 4.77382,
+            "1764": 4.93787,
+            "1765": 5.14977,
+            "1766": 5.33847,
+            "1767": 5.22339,
+            "1768": 4.95072,
+            "1769": 5.00607,
+            "1770": 4.98077,
+            "1771": 4.96436,
+            "1772": 4.98395,
+            "1773": 4.97312,
+            "1774": 4.86859,
+            "1775": 4.95207,
+            "1776": 4.99761,
+            "1777": 4.94332,
+            "1778": 4.99268,
+            "1779": 5.08376,
+            "1780": 4.83276,
+            "1781": 5.05321,
+            "1782": 4.9968,
+            "1783": 5.01268,
+            "1784": 4.93195,
+            "1785": 5.16736,
+            "1786": 4.81265,
+            "1787": 4.97081,
+            "1788": 4.82725,
+            "1789": 4.88846,
+            "1790": 4.79821,
+            "1791": 4.73741,
+            "1792": 4.87626,
+            "1793": 5.10356,
+            "1794": 4.98084,
+            "1795": 4.96551,
+            "1796": 4.99704,
+            "1797": 4.7903,
+            "1798": 4.76702,
+            "1799": 5.01884,
+            "1800": 4.91364,
+            "1801": 5.04679,
+            "1802": 4.82665,
+            "1803": 4.95171,
+            "1804": 4.88594,
+            "1805": 4.90346,
+            "1806": 4.87351,
+            "1807": 4.92406,
+            "1808": 4.92697,
+            "1809": 5.1451,
+            "1810": 5.09976,
+            "1811": 4.95906,
+            "1812": 4.80139,
+            "1813": 5.09748,
+            "1814": 4.77766,
+            "1815": 4.86134,
+            "1816": 5.05005,
+            "1817": 4.79012,
+            "1818": 4.80376,
+            "1819": 5.02382,
+            "1820": 4.68652,
+            "1821": 5.02661,
+            "1822": 4.66251,
+            "1823": 4.8659,
+            "1824": 4.78635,
+            "1825": 5.06537,
+            "1826": 4.81944,
+            "1827": 4.7895,
+            "1828": 4.94677,
+            "1829": 5.11262,
+            "1830": 4.91236,
+            "1831": 4.89818,
+            "1832": 4.83359,
+            "1833": 4.78363,
+            "1834": 4.9482,
+            "1835": 4.95795,
+            "1836": 4.90747,
+            "1837": 4.67243,
+            "1838": 4.80953,
+            "1839": 4.89546,
+            "1840": 4.90488,
+            "1841": 4.8292,
+            "1842": 4.94678,
+            "1843": 4.70293,
+            "1844": 4.61431,
+            "1845": 5.00086,
+            "1846": 4.74657,
+            "1847": 4.8645,
+            "1848": 4.89695,
+            "1849": 4.85358,
+            "1850": 4.8676,
+            "1851": 5.02236,
+            "1852": 4.97647,
+            "1853": 4.83325,
+            "1854": 4.86791,
+            "1855": 4.8219,
+            "1856": 4.75614,
+            "1857": 4.9619,
+            "1858": 4.96856,
+            "1859": 4.75323,
+            "1860": 4.86592,
+            "1861": 5.20685,
+            "1862": 4.61669,
+            "1863": 4.83385,
+            "1864": 4.7505,
+            "1865": 4.86441,
+            "1866": 4.79455,
+            "1867": 4.99688,
+            "1868": 4.71331,
+            "1869": 4.75634,
+            "1870": 4.93203,
+            "1871": 4.99184,
+            "1872": 4.68332,
+            "1873": 4.69823,
+            "1874": 4.85174,
+            "1875": 4.85999,
+            "1876": 4.7392,
+            "1877": 4.80362,
+            "1878": 4.81239,
+            "1879": 4.82084,
+            "1880": 4.89314,
+            "1881": 4.79389,
+            "1882": 4.79419,
+            "1883": 4.78157,
+            "1884": 4.97086,
+            "1885": 4.91799,
+            "1886": 4.82203,
+            "1887": 4.81334,
+            "1888": 4.97395,
+            "1889": 4.95922,
+            "1890": 4.70676,
+            "1891": 4.65282,
+            "1892": 4.84393,
+            "1893": 4.64594,
+            "1894": 4.90265,
+            "1895": 4.7886,
+            "1896": 4.66112,
+            "1897": 4.78966,
+            "1898": 4.9139,
+            "1899": 4.77532,
+            "1900": 4.91571,
+            "1901": 4.84525,
+            "1902": 4.78411,
+            "1903": 4.75997,
+            "1904": 4.65339,
+            "1905": 4.54188,
+            "1906": 4.81097,
+            "1907": 4.90225,
+            "1908": 5.03012,
+            "1909": 4.88434,
+            "1910": 4.78852,
+            "1911": 4.80477,
+            "1912": 4.64685,
+            "1913": 4.94065,
+            "1914": 4.87965,
+            "1915": 4.85906,
+            "1916": 4.92227,
+            "1917": 4.85425,
+            "1918": 4.87001,
+            "1919": 4.99304,
+            "1920": 4.76319,
+            "1921": 4.88494,
+            "1922": 4.81295,
+            "1923": 4.7592,
+            "1924": 4.82501,
+            "1925": 5.05793,
+            "1926": 4.92996,
+            "1927": 4.92587,
+            "1928": 4.92702,
+            "1929": 4.92705,
+            "1930": 4.91019,
+            "1931": 4.77616,
+            "1932": 4.85963,
+            "1933": 4.83545,
+            "1934": 4.84013,
+            "1935": 5.10729,
+            "1936": 4.88314,
+            "1937": 4.87654,
+            "1938": 4.79463,
+            "1939": 4.71148,
+            "1940": 4.82418,
+            "1941": 4.73372,
+            "1942": 4.87249,
+            "1943": 4.7353,
+            "1944": 4.74198,
+            "1945": 4.6818,
+            "1946": 4.91539,
+            "1947": 4.86756,
+            "1948": 4.59887,
+            "1949": 4.90387,
+            "1950": 4.78785,
+            "1951": 4.95942,
+            "1952": 4.73677,
+            "1953": 4.79496,
+            "1954": 4.73264,
+            "1955": 4.84308,
+            "1956": 4.88233,
+            "1957": 4.73496,
+            "1958": 4.70018,
+            "1959": 4.75966,
+            "1960": 4.76849,
+            "1961": 4.7146,
+            "1962": 4.83392,
+            "1963": 4.82321,
+            "1964": 4.84664,
+            "1965": 4.87523,
+            "1966": 4.78753,
+            "1967": 4.59211,
+            "1968": 4.82724,
+            "1969": 4.59184,
+            "1970": 4.56633,
+            "1971": 4.9072,
+            "1972": 4.90064,
+            "1973": 4.54642,
+            "1974": 4.82423,
+            "1975": 4.82778,
+            "1976": 4.71327,
+            "1977": 4.57967,
+            "1978": 5.0045,
+            "1979": 4.66094,
+            "1980": 4.74256,
+            "1981": 4.86301,
+            "1982": 4.72234,
+            "1983": 4.8786,
+            "1984": 4.64152,
+            "1985": 4.78,
+            "1986": 4.70167,
+            "1987": 4.81036,
+            "1988": 4.8871,
+            "1989": 4.63185,
+            "1990": 4.79636,
+            "1991": 4.69424,
+            "1992": 4.79439,
+            "1993": 4.74063,
+            "1994": 4.84977,
+            "1995": 4.5596,
+            "1996": 4.65161,
+            "1997": 4.80342,
+            "1998": 4.67403,
+            "1999": 4.72284,
+            "2000": 4.61765
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 80.0,
+            "2": 70.0,
+            "3": 78.0,
+            "4": 80.0,
+            "5": 75.0,
+            "6": 87.0,
+            "7": 63.0,
+            "8": 77.0,
+            "9": 62.0,
+            "10": 90.0,
+            "11": 74.0,
+            "12": 79.0,
+            "13": 77.0,
+            "14": 83.0,
+            "15": 78.0,
+            "16": 69.0,
+            "17": 64.0,
+            "18": 63.0,
+            "19": 87.0,
+            "20": 90.0,
+            "21": 75.0,
+            "22": 84.0,
+            "23": 81.0,
+            "24": 78.0,
+            "25": 87.0,
+            "26": 69.0,
+            "27": 86.0,
+            "28": 91.0,
+            "29": 94.0,
+            "30": 115.0,
+            "31": 99.0,
+            "32": 109.0,
+            "33": 92.0,
+            "34": 103.0,
+            "35": 118.0,
+            "36": 117.0,
+            "37": 105.0,
+            "38": 129.0,
+            "39": 89.0,
+            "40": 129.0,
+            "41": 114.0,
+            "42": 121.0,
+            "43": 135.0,
+            "44": 128.0,
+            "45": 126.0,
+            "46": 129.0,
+            "47": 133.0,
+            "48": 139.0,
+            "49": 135.0,
+            "50": 157.0,
+            "51": 122.0,
+            "52": 150.0,
+            "53": 108.0,
+            "54": 140.0,
+            "55": 133.0,
+            "56": 156.0,
+            "57": 150.0,
+            "58": 153.0,
+            "59": 135.0,
+            "60": 135.0,
+            "61": 165.0,
+            "62": 145.0,
+            "63": 199.0,
+            "64": 161.0,
+            "65": 162.0,
+            "66": 162.0,
+            "67": 195.0,
+            "68": 140.0,
+            "69": 158.0,
+            "70": 169.0,
+            "71": 188.0,
+            "72": 160.0,
+            "73": 151.0,
+            "74": 154.0,
+            "75": 172.0,
+            "76": 169.0,
+            "77": 165.0,
+            "78": 193.0,
+            "79": 144.0,
+            "80": 173.0,
+            "81": 150.0,
+            "82": 141.0,
+            "83": 186.0,
+            "84": 169.0,
+            "85": 183.0,
+            "86": 196.0,
+            "87": 197.0,
+            "88": 184.0,
+            "89": 169.0,
+            "90": 182.0,
+            "91": 200.0,
+            "92": 179.0,
+            "93": 165.0,
+            "94": 153.0,
+            "95": 176.0,
+            "96": 191.0,
+            "97": 183.0,
+            "98": 199.0,
+            "99": 163.0,
+            "100": 157.0,
+            "101": 144.0,
+            "102": 184.0,
+            "103": 206.0,
+            "104": 171.0,
+            "105": 215.0,
+            "106": 176.0,
+            "107": 172.0,
+            "108": 172.0,
+            "109": 172.0,
+            "110": 216.0,
+            "111": 182.0,
+            "112": 172.0,
+            "113": 167.0,
+            "114": 192.0,
+            "115": 175.0,
+            "116": 181.0,
+            "117": 177.0,
+            "118": 142.0,
+            "119": 212.0,
+            "120": 164.0,
+            "121": 193.0,
+            "122": 160.0,
+            "123": 169.0,
+            "124": 191.0,
+            "125": 214.0,
+            "126": 160.0,
+            "127": 192.0,
+            "128": 160.0,
+            "129": 180.0,
+            "130": 214.0,
+            "131": 219.0,
+            "132": 173.0,
+            "133": 166.0,
+            "134": 171.0,
+            "135": 182.0,
+            "136": 172.0,
+            "137": 176.0,
+            "138": 174.0,
+            "139": 161.0,
+            "140": 178.0,
+            "141": 164.0,
+            "142": 159.0,
+            "143": 192.0,
+            "144": 157.0,
+            "145": 144.0,
+            "146": 149.0,
+            "147": 148.0,
+            "148": 169.0,
+            "149": 143.0,
+            "150": 111.0,
+            "151": 159.0,
+            "152": 115.0,
+            "153": 147.0,
+            "154": 162.0,
+            "155": 185.0,
+            "156": 144.0,
+            "157": 147.0,
+            "158": 130.0,
+            "159": 165.0,
+            "160": 190.0,
+            "161": 141.0,
+            "162": 155.0,
+            "163": 140.0,
+            "164": 174.0,
+            "165": 168.0,
+            "166": 179.0,
+            "167": 147.0,
+            "168": 138.0,
+            "169": 161.0,
+            "170": 159.0,
+            "171": 125.0,
+            "172": 193.0,
+            "173": 172.0,
+            "174": 190.0,
+            "175": 192.0,
+            "176": 146.0,
+            "177": 168.0,
+            "178": 172.0,
+            "179": 177.0,
+            "180": 148.0,
+            "181": 161.0,
+            "182": 213.0,
+            "183": 215.0,
+            "184": 201.0,
+            "185": 154.0,
+            "186": 207.0,
+            "187": 175.0,
+            "188": 183.0,
+            "189": 169.0,
+            "190": 167.0,
+            "191": 163.0,
+            "192": 193.0,
+            "193": 169.0,
+            "194": 161.0,
+            "195": 141.0,
+            "196": 174.0,
+            "197": 188.0,
+            "198": 168.0,
+            "199": 150.0,
+            "200": 187.0,
+            "201": 173.0,
+            "202": 183.0,
+            "203": 142.0,
+            "204": 177.0,
+            "205": 153.0,
+            "206": 198.0,
+            "207": 168.0,
+            "208": 140.0,
+            "209": 179.0,
+            "210": 175.0,
+            "211": 167.0,
+            "212": 194.0,
+            "213": 192.0,
+            "214": 174.0,
+            "215": 188.0,
+            "216": 164.0,
+            "217": 170.0,
+            "218": 171.0,
+            "219": 211.0,
+            "220": 195.0,
+            "221": 181.0,
+            "222": 154.0,
+            "223": 176.0,
+            "224": 173.0,
+            "225": 166.0,
+            "226": 174.0,
+            "227": 211.0,
+            "228": 146.0,
+            "229": 193.0,
+            "230": 149.0,
+            "231": 177.0,
+            "232": 169.0,
+            "233": 193.0,
+            "234": 183.0,
+            "235": 215.0,
+            "236": 200.0,
+            "237": 218.0,
+            "238": 179.0,
+            "239": 139.0,
+            "240": 217.0,
+            "241": 174.0,
+            "242": 193.0,
+            "243": 192.0,
+            "244": 181.0,
+            "245": 206.0,
+            "246": 221.0,
+            "247": 219.0,
+            "248": 175.0,
+            "249": 189.0,
+            "250": 156.0,
+            "251": 205.0,
+            "252": 164.0,
+            "253": 172.0,
+            "254": 184.0,
+            "255": 218.0,
+            "256": 171.0,
+            "257": 208.0,
+            "258": 210.0,
+            "259": 174.0,
+            "260": 199.0,
+            "261": 178.0,
+            "262": 185.0,
+            "263": 181.0,
+            "264": 200.0,
+            "265": 171.0,
+            "266": 149.0,
+            "267": 141.0,
+            "268": 186.0,
+            "269": 198.0,
+            "270": 170.0,
+            "271": 168.0,
+            "272": 210.0,
+            "273": 151.0,
+            "274": 212.0,
+            "275": 182.0,
+            "276": 172.0,
+            "277": 159.0,
+            "278": 169.0,
+            "279": 185.0,
+            "280": 174.0,
+            "281": 160.0,
+            "282": 171.0,
+            "283": 174.0,
+            "284": 183.0,
+            "285": 169.0,
+            "286": 173.0,
+            "287": 203.0,
+            "288": 168.0,
+            "289": 202.0,
+            "290": 157.0,
+            "291": 241.0,
+            "292": 172.0,
+            "293": 209.0,
+            "294": 194.0,
+            "295": 207.0,
+            "296": 217.0,
+            "297": 160.0,
+            "298": 126.0,
+            "299": 170.0,
+            "300": 177.0,
+            "301": 189.0,
+            "302": 209.0,
+            "303": 170.0,
+            "304": 177.0,
+            "305": 148.0,
+            "306": 172.0,
+            "307": 213.0,
+            "308": 184.0,
+            "309": 193.0,
+            "310": 218.0,
+            "311": 159.0,
+            "312": 178.0,
+            "313": 177.0,
+            "314": 199.0,
+            "315": 165.0,
+            "316": 168.0,
+            "317": 185.0,
+            "318": 261.0,
+            "319": 181.0,
+            "320": 196.0,
+            "321": 200.0,
+            "322": 217.0,
+            "323": 198.0,
+            "324": 200.0,
+            "325": 184.0,
+            "326": 283.0,
+            "327": 211.0,
+            "328": 231.0,
+            "329": 189.0,
+            "330": 248.0,
+            "331": 205.0,
+            "332": 208.0,
+            "333": 199.0,
+            "334": 182.0,
+            "335": 202.0,
+            "336": 207.0,
+            "337": 216.0,
+            "338": 231.0,
+            "339": 213.0,
+            "340": 240.0,
+            "341": 207.0,
+            "342": 153.0,
+            "343": 264.0,
+            "344": 214.0,
+            "345": 202.0,
+            "346": 183.0,
+            "347": 194.0,
+            "348": 216.0,
+            "349": 206.0,
+            "350": 218.0,
+            "351": 218.0,
+            "352": 207.0,
+            "353": 225.0,
+            "354": 213.0,
+            "355": 201.0,
+            "356": 227.0,
+            "357": 217.0,
+            "358": 206.0,
+            "359": 186.0,
+            "360": 217.0,
+            "361": 187.0,
+            "362": 256.0,
+            "363": 226.0,
+            "364": 203.0,
+            "365": 200.0,
+            "366": 241.0,
+            "367": 205.0,
+            "368": 192.0,
+            "369": 160.0,
+            "370": 221.0,
+            "371": 212.0,
+            "372": 193.0,
+            "373": 218.0,
+            "374": 164.0,
+            "375": 249.0,
+            "376": 195.0,
+            "377": 197.0,
+            "378": 222.0,
+            "379": 254.0,
+            "380": 210.0,
+            "381": 199.0,
+            "382": 217.0,
+            "383": 208.0,
+            "384": 238.0,
+            "385": 183.0,
+            "386": 221.0,
+            "387": 185.0,
+            "388": 205.0,
+            "389": 185.0,
+            "390": 217.0,
+            "391": 241.0,
+            "392": 212.0,
+            "393": 247.0,
+            "394": 242.0,
+            "395": 247.0,
+            "396": 197.0,
+            "397": 202.0,
+            "398": 191.0,
+            "399": 231.0,
+            "400": 211.0,
+            "401": 200.0,
+            "402": 210.0,
+            "403": 261.0,
+            "404": 211.0,
+            "405": 171.0,
+            "406": 209.0,
+            "407": 200.0,
+            "408": 226.0,
+            "409": 200.0,
+            "410": 220.0,
+            "411": 196.0,
+            "412": 194.0,
+            "413": 168.0,
+            "414": 223.0,
+            "415": 204.0,
+            "416": 225.0,
+            "417": 213.0,
+            "418": 196.0,
+            "419": 203.0,
+            "420": 203.0,
+            "421": 217.0,
+            "422": 200.0,
+            "423": 213.0,
+            "424": 237.0,
+            "425": 239.0,
+            "426": 178.0,
+            "427": 213.0,
+            "428": 196.0,
+            "429": 174.0,
+            "430": 243.0,
+            "431": 169.0,
+            "432": 203.0,
+            "433": 211.0,
+            "434": 194.0,
+            "435": 188.0,
+            "436": 208.0,
+            "437": 170.0,
+            "438": 194.0,
+            "439": 156.0,
+            "440": 199.0,
+            "441": 190.0,
+            "442": 232.0,
+            "443": 225.0,
+            "444": 172.0,
+            "445": 194.0,
+            "446": 221.0,
+            "447": 209.0,
+            "448": 233.0,
+            "449": 257.0,
+            "450": 207.0,
+            "451": 199.0,
+            "452": 177.0,
+            "453": 200.0,
+            "454": 227.0,
+            "455": 263.0,
+            "456": 196.0,
+            "457": 204.0,
+            "458": 169.0,
+            "459": 131.0,
+            "460": 216.0,
+            "461": 223.0,
+            "462": 210.0,
+            "463": 203.0,
+            "464": 208.0,
+            "465": 187.0,
+            "466": 190.0,
+            "467": 192.0,
+            "468": 194.0,
+            "469": 188.0,
+            "470": 193.0,
+            "471": 221.0,
+            "472": 166.0,
+            "473": 191.0,
+            "474": 193.0,
+            "475": 196.0,
+            "476": 192.0,
+            "477": 168.0,
+            "478": 180.0,
+            "479": 176.0,
+            "480": 145.0,
+            "481": 197.0,
+            "482": 167.0,
+            "483": 198.0,
+            "484": 172.0,
+            "485": 175.0,
+            "486": 192.0,
+            "487": 143.0,
+            "488": 182.0,
+            "489": 172.0,
+            "490": 178.0,
+            "491": 175.0,
+            "492": 194.0,
+            "493": 211.0,
+            "494": 159.0,
+            "495": 165.0,
+            "496": 153.0,
+            "497": 145.0,
+            "498": 196.0,
+            "499": 195.0,
+            "500": 165.0,
+            "501": 183.0,
+            "502": 167.0,
+            "503": 175.0,
+            "504": 182.0,
+            "505": 212.0,
+            "506": 177.0,
+            "507": 159.0,
+            "508": 135.0,
+            "509": 195.0,
+            "510": 156.0,
+            "511": 186.0,
+            "512": 177.0,
+            "513": 186.0,
+            "514": 173.0,
+            "515": 190.0,
+            "516": 175.0,
+            "517": 143.0,
+            "518": 169.0,
+            "519": 186.0,
+            "520": 156.0,
+            "521": 146.0,
+            "522": 173.0,
+            "523": 175.0,
+            "524": 172.0,
+            "525": 202.0,
+            "526": 168.0,
+            "527": 178.0,
+            "528": 173.0,
+            "529": 183.0,
+            "530": 168.0,
+            "531": 161.0,
+            "532": 185.0,
+            "533": 172.0,
+            "534": 166.0,
+            "535": 140.0,
+            "536": 164.0,
+            "537": 150.0,
+            "538": 155.0,
+            "539": 125.0,
+            "540": 151.0,
+            "541": 130.0,
+            "542": 153.0,
+            "543": 149.0,
+            "544": 185.0,
+            "545": 132.0,
+            "546": 184.0,
+            "547": 150.0,
+            "548": 155.0,
+            "549": 162.0,
+            "550": 170.0,
+            "551": 144.0,
+            "552": 147.0,
+            "553": 213.0,
+            "554": 182.0,
+            "555": 150.0,
+            "556": 162.0,
+            "557": 154.0,
+            "558": 181.0,
+            "559": 144.0,
+            "560": 194.0,
+            "561": 174.0,
+            "562": 147.0,
+            "563": 125.0,
+            "564": 169.0,
+            "565": 143.0,
+            "566": 136.0,
+            "567": 144.0,
+            "568": 153.0,
+            "569": 167.0,
+            "570": 153.0,
+            "571": 131.0,
+            "572": 143.0,
+            "573": 128.0,
+            "574": 162.0,
+            "575": 133.0,
+            "576": 143.0,
+            "577": 171.0,
+            "578": 167.0,
+            "579": 140.0,
+            "580": 165.0,
+            "581": 164.0,
+            "582": 145.0,
+            "583": 151.0,
+            "584": 146.0,
+            "585": 148.0,
+            "586": 102.0,
+            "587": 147.0,
+            "588": 146.0,
+            "589": 123.0,
+            "590": 146.0,
+            "591": 149.0,
+            "592": 115.0,
+            "593": 166.0,
+            "594": 159.0,
+            "595": 127.0,
+            "596": 113.0,
+            "597": 135.0,
+            "598": 139.0,
+            "599": 157.0,
+            "600": 129.0,
+            "601": 144.0,
+            "602": 129.0,
+            "603": 125.0,
+            "604": 125.0,
+            "605": 139.0,
+            "606": 135.0,
+            "607": 144.0,
+            "608": 149.0,
+            "609": 139.0,
+            "610": 135.0,
+            "611": 148.0,
+            "612": 148.0,
+            "613": 115.0,
+            "614": 150.0,
+            "615": 132.0,
+            "616": 156.0,
+            "617": 120.0,
+            "618": 145.0,
+            "619": 136.0,
+            "620": 170.0,
+            "621": 147.0,
+            "622": 150.0,
+            "623": 119.0,
+            "624": 128.0,
+            "625": 141.0,
+            "626": 122.0,
+            "627": 121.0,
+            "628": 157.0,
+            "629": 126.0,
+            "630": 134.0,
+            "631": 147.0,
+            "632": 146.0,
+            "633": 131.0,
+            "634": 145.0,
+            "635": 174.0,
+            "636": 151.0,
+            "637": 169.0,
+            "638": 128.0,
+            "639": 164.0,
+            "640": 145.0,
+            "641": 136.0,
+            "642": 132.0,
+            "643": 134.0,
+            "644": 124.0,
+            "645": 145.0,
+            "646": 106.0,
+            "647": 123.0,
+            "648": 121.0,
+            "649": 134.0,
+            "650": 153.0,
+            "651": 117.0,
+            "652": 163.0,
+            "653": 155.0,
+            "654": 140.0,
+            "655": 154.0,
+            "656": 124.0,
+            "657": 116.0,
+            "658": 130.0,
+            "659": 114.0,
+            "660": 145.0,
+            "661": 121.0,
+            "662": 143.0,
+            "663": 124.0,
+            "664": 139.0,
+            "665": 138.0,
+            "666": 111.0,
+            "667": 127.0,
+            "668": 144.0,
+            "669": 116.0,
+            "670": 139.0,
+            "671": 132.0,
+            "672": 136.0,
+            "673": 139.0,
+            "674": 119.0,
+            "675": 165.0,
+            "676": 123.0,
+            "677": 127.0,
+            "678": 135.0,
+            "679": 83.0,
+            "680": 139.0,
+            "681": 120.0,
+            "682": 111.0,
+            "683": 119.0,
+            "684": 121.0,
+            "685": 145.0,
+            "686": 127.0,
+            "687": 145.0,
+            "688": 117.0,
+            "689": 119.0,
+            "690": 119.0,
+            "691": 124.0,
+            "692": 118.0,
+            "693": 112.0,
+            "694": 156.0,
+            "695": 114.0,
+            "696": 141.0,
+            "697": 123.0,
+            "698": 130.0,
+            "699": 147.0,
+            "700": 119.0,
+            "701": 139.0,
+            "702": 111.0,
+            "703": 113.0,
+            "704": 118.0,
+            "705": 115.0,
+            "706": 102.0,
+            "707": 121.0,
+            "708": 115.0,
+            "709": 116.0,
+            "710": 95.0,
+            "711": 101.0,
+            "712": 98.0,
+            "713": 117.0,
+            "714": 127.0,
+            "715": 135.0,
+            "716": 124.0,
+            "717": 88.0,
+            "718": 143.0,
+            "719": 114.0,
+            "720": 120.0,
+            "721": 106.0,
+            "722": 117.0,
+            "723": 101.0,
+            "724": 97.0,
+            "725": 106.0,
+            "726": 103.0,
+            "727": 95.0,
+            "728": 123.0,
+            "729": 104.0,
+            "730": 124.0,
+            "731": 111.0,
+            "732": 78.0,
+            "733": 96.0,
+            "734": 129.0,
+            "735": 142.0,
+            "736": 110.0,
+            "737": 132.0,
+            "738": 110.0,
+            "739": 136.0,
+            "740": 106.0,
+            "741": 102.0,
+            "742": 123.0,
+            "743": 133.0,
+            "744": 130.0,
+            "745": 109.0,
+            "746": 122.0,
+            "747": 125.0,
+            "748": 133.0,
+            "749": 114.0,
+            "750": 121.0,
+            "751": 113.0,
+            "752": 111.0,
+            "753": 96.0,
+            "754": 118.0,
+            "755": 87.0,
+            "756": 113.0,
+            "757": 91.0,
+            "758": 105.0,
+            "759": 99.0,
+            "760": 125.0,
+            "761": 106.0,
+            "762": 105.0,
+            "763": 101.0,
+            "764": 109.0,
+            "765": 118.0,
+            "766": 95.0,
+            "767": 133.0,
+            "768": 115.0,
+            "769": 122.0,
+            "770": 106.0,
+            "771": 123.0,
+            "772": 106.0,
+            "773": 136.0,
+            "774": 128.0,
+            "775": 116.0,
+            "776": 112.0,
+            "777": 95.0,
+            "778": 113.0,
+            "779": 119.0,
+            "780": 99.0,
+            "781": 107.0,
+            "782": 80.0,
+            "783": 108.0,
+            "784": 122.0,
+            "785": 111.0,
+            "786": 111.0,
+            "787": 115.0,
+            "788": 116.0,
+            "789": 108.0,
+            "790": 127.0,
+            "791": 83.0,
+            "792": 117.0,
+            "793": 102.0,
+            "794": 106.0,
+            "795": 123.0,
+            "796": 121.0,
+            "797": 124.0,
+            "798": 112.0,
+            "799": 136.0,
+            "800": 99.0,
+            "801": 117.0,
+            "802": 93.0,
+            "803": 166.0,
+            "804": 127.0,
+            "805": 124.0,
+            "806": 97.0,
+            "807": 134.0,
+            "808": 108.0,
+            "809": 121.0,
+            "810": 126.0,
+            "811": 107.0,
+            "812": 116.0,
+            "813": 126.0,
+            "814": 105.0,
+            "815": 98.0,
+            "816": 99.0,
+            "817": 97.0,
+            "818": 97.0,
+            "819": 109.0,
+            "820": 106.0,
+            "821": 88.0,
+            "822": 109.0,
+            "823": 108.0,
+            "824": 127.0,
+            "825": 108.0,
+            "826": 128.0,
+            "827": 134.0,
+            "828": 100.0,
+            "829": 125.0,
+            "830": 113.0,
+            "831": 114.0,
+            "832": 107.0,
+            "833": 113.0,
+            "834": 100.0,
+            "835": 98.0,
+            "836": 123.0,
+            "837": 95.0,
+            "838": 118.0,
+            "839": 96.0,
+            "840": 109.0,
+            "841": 98.0,
+            "842": 114.0,
+            "843": 113.0,
+            "844": 123.0,
+            "845": 108.0,
+            "846": 124.0,
+            "847": 112.0,
+            "848": 115.0,
+            "849": 118.0,
+            "850": 92.0,
+            "851": 145.0,
+            "852": 89.0,
+            "853": 106.0,
+            "854": 101.0,
+            "855": 113.0,
+            "856": 125.0,
+            "857": 105.0,
+            "858": 129.0,
+            "859": 107.0,
+            "860": 118.0,
+            "861": 85.0,
+            "862": 106.0,
+            "863": 95.0,
+            "864": 81.0,
+            "865": 104.0,
+            "866": 105.0,
+            "867": 104.0,
+            "868": 106.0,
+            "869": 109.0,
+            "870": 105.0,
+            "871": 122.0,
+            "872": 114.0,
+            "873": 100.0,
+            "874": 113.0,
+            "875": 108.0,
+            "876": 93.0,
+            "877": 130.0,
+            "878": 110.0,
+            "879": 122.0,
+            "880": 106.0,
+            "881": 103.0,
+            "882": 80.0,
+            "883": 107.0,
+            "884": 115.0,
+            "885": 113.0,
+            "886": 116.0,
+            "887": 131.0,
+            "888": 89.0,
+            "889": 120.0,
+            "890": 110.0,
+            "891": 103.0,
+            "892": 102.0,
+            "893": 106.0,
+            "894": 91.0,
+            "895": 118.0,
+            "896": 110.0,
+            "897": 103.0,
+            "898": 115.0,
+            "899": 119.0,
+            "900": 120.0,
+            "901": 99.0,
+            "902": 100.0,
+            "903": 102.0,
+            "904": 127.0,
+            "905": 105.0,
+            "906": 124.0,
+            "907": 104.0,
+            "908": 117.0,
+            "909": 124.0,
+            "910": 108.0,
+            "911": 102.0,
+            "912": 117.0,
+            "913": 122.0,
+            "914": 130.0,
+            "915": 98.0,
+            "916": 120.0,
+            "917": 113.0,
+            "918": 112.0,
+            "919": 85.0,
+            "920": 110.0,
+            "921": 108.0,
+            "922": 111.0,
+            "923": 116.0,
+            "924": 119.0,
+            "925": 105.0,
+            "926": 128.0,
+            "927": 120.0,
+            "928": 106.0,
+            "929": 94.0,
+            "930": 116.0,
+            "931": 102.0,
+            "932": 123.0,
+            "933": 114.0,
+            "934": 133.0,
+            "935": 86.0,
+            "936": 114.0,
+            "937": 96.0,
+            "938": 118.0,
+            "939": 111.0,
+            "940": 110.0,
+            "941": 102.0,
+            "942": 98.0,
+            "943": 119.0,
+            "944": 107.0,
+            "945": 106.0,
+            "946": 112.0,
+            "947": 93.0,
+            "948": 119.0,
+            "949": 116.0,
+            "950": 124.0,
+            "951": 112.0,
+            "952": 106.0,
+            "953": 97.0,
+            "954": 111.0,
+            "955": 112.0,
+            "956": 87.0,
+            "957": 117.0,
+            "958": 97.0,
+            "959": 91.0,
+            "960": 103.0,
+            "961": 102.0,
+            "962": 103.0,
+            "963": 127.0,
+            "964": 113.0,
+            "965": 120.0,
+            "966": 106.0,
+            "967": 104.0,
+            "968": 119.0,
+            "969": 89.0,
+            "970": 121.0,
+            "971": 115.0,
+            "972": 96.0,
+            "973": 90.0,
+            "974": 113.0,
+            "975": 109.0,
+            "976": 113.0,
+            "977": 85.0,
+            "978": 104.0,
+            "979": 109.0,
+            "980": 100.0,
+            "981": 94.0,
+            "982": 105.0,
+            "983": 84.0,
+            "984": 112.0,
+            "985": 108.0,
+            "986": 92.0,
+            "987": 88.0,
+            "988": 123.0,
+            "989": 106.0,
+            "990": 103.0,
+            "991": 128.0,
+            "992": 104.0,
+            "993": 109.0,
+            "994": 98.0,
+            "995": 104.0,
+            "996": 93.0,
+            "997": 128.0,
+            "998": 121.0,
+            "999": 89.0,
+            "1000": 118.0,
+            "1001": 104.0,
+            "1002": 96.0,
+            "1003": 107.0,
+            "1004": 88.0,
+            "1005": 103.0,
+            "1006": 105.0,
+            "1007": 102.0,
+            "1008": 83.0,
+            "1009": 117.0,
+            "1010": 104.0,
+            "1011": 127.0,
+            "1012": 117.0,
+            "1013": 106.0,
+            "1014": 111.0,
+            "1015": 110.0,
+            "1016": 91.0,
+            "1017": 76.0,
+            "1018": 115.0,
+            "1019": 123.0,
+            "1020": 111.0,
+            "1021": 106.0,
+            "1022": 108.0,
+            "1023": 137.0,
+            "1024": 122.0,
+            "1025": 104.0,
+            "1026": 109.0,
+            "1027": 92.0,
+            "1028": 96.0,
+            "1029": 116.0,
+            "1030": 96.0,
+            "1031": 122.0,
+            "1032": 103.0,
+            "1033": 108.0,
+            "1034": 111.0,
+            "1035": 86.0,
+            "1036": 74.0,
+            "1037": 123.0,
+            "1038": 85.0,
+            "1039": 128.0,
+            "1040": 95.0,
+            "1041": 116.0,
+            "1042": 107.0,
+            "1043": 96.0,
+            "1044": 116.0,
+            "1045": 115.0,
+            "1046": 92.0,
+            "1047": 106.0,
+            "1048": 88.0,
+            "1049": 121.0,
+            "1050": 117.0,
+            "1051": 105.0,
+            "1052": 96.0,
+            "1053": 98.0,
+            "1054": 85.0,
+            "1055": 110.0,
+            "1056": 91.0,
+            "1057": 109.0,
+            "1058": 95.0,
+            "1059": 106.0,
+            "1060": 109.0,
+            "1061": 97.0,
+            "1062": 105.0,
+            "1063": 91.0,
+            "1064": 103.0,
+            "1065": 108.0,
+            "1066": 112.0,
+            "1067": 108.0,
+            "1068": 108.0,
+            "1069": 123.0,
+            "1070": 100.0,
+            "1071": 95.0,
+            "1072": 111.0,
+            "1073": 118.0,
+            "1074": 101.0,
+            "1075": 95.0,
+            "1076": 111.0,
+            "1077": 89.0,
+            "1078": 94.0,
+            "1079": 113.0,
+            "1080": 82.0,
+            "1081": 114.0,
+            "1082": 87.0,
+            "1083": 116.0,
+            "1084": 105.0,
+            "1085": 97.0,
+            "1086": 119.0,
+            "1087": 86.0,
+            "1088": 93.0,
+            "1089": 114.0,
+            "1090": 87.0,
+            "1091": 109.0,
+            "1092": 90.0,
+            "1093": 109.0,
+            "1094": 101.0,
+            "1095": 90.0,
+            "1096": 106.0,
+            "1097": 100.0,
+            "1098": 105.0,
+            "1099": 96.0,
+            "1100": 92.0,
+            "1101": 108.0,
+            "1102": 94.0,
+            "1103": 86.0,
+            "1104": 103.0,
+            "1105": 109.0,
+            "1106": 87.0,
+            "1107": 87.0,
+            "1108": 96.0,
+            "1109": 102.0,
+            "1110": 89.0,
+            "1111": 76.0,
+            "1112": 110.0,
+            "1113": 104.0,
+            "1114": 89.0,
+            "1115": 114.0,
+            "1116": 97.0,
+            "1117": 108.0,
+            "1118": 107.0,
+            "1119": 118.0,
+            "1120": 112.0,
+            "1121": 96.0,
+            "1122": 103.0,
+            "1123": 112.0,
+            "1124": 98.0,
+            "1125": 97.0,
+            "1126": 121.0,
+            "1127": 80.0,
+            "1128": 91.0,
+            "1129": 106.0,
+            "1130": 96.0,
+            "1131": 82.0,
+            "1132": 103.0,
+            "1133": 86.0,
+            "1134": 92.0,
+            "1135": 98.0,
+            "1136": 90.0,
+            "1137": 120.0,
+            "1138": 102.0,
+            "1139": 109.0,
+            "1140": 88.0,
+            "1141": 90.0,
+            "1142": 95.0,
+            "1143": 88.0,
+            "1144": 77.0,
+            "1145": 92.0,
+            "1146": 85.0,
+            "1147": 108.0,
+            "1148": 77.0,
+            "1149": 93.0,
+            "1150": 101.0,
+            "1151": 116.0,
+            "1152": 72.0,
+            "1153": 90.0,
+            "1154": 103.0,
+            "1155": 106.0,
+            "1156": 91.0,
+            "1157": 100.0,
+            "1158": 101.0,
+            "1159": 111.0,
+            "1160": 114.0,
+            "1161": 90.0,
+            "1162": 92.0,
+            "1163": 90.0,
+            "1164": 96.0,
+            "1165": 100.0,
+            "1166": 114.0,
+            "1167": 82.0,
+            "1168": 96.0,
+            "1169": 77.0,
+            "1170": 91.0,
+            "1171": 94.0,
+            "1172": 99.0,
+            "1173": 124.0,
+            "1174": 106.0,
+            "1175": 97.0,
+            "1176": 102.0,
+            "1177": 78.0,
+            "1178": 108.0,
+            "1179": 103.0,
+            "1180": 84.0,
+            "1181": 76.0,
+            "1182": 115.0,
+            "1183": 104.0,
+            "1184": 122.0,
+            "1185": 104.0,
+            "1186": 104.0,
+            "1187": 91.0,
+            "1188": 112.0,
+            "1189": 101.0,
+            "1190": 106.0,
+            "1191": 97.0,
+            "1192": 90.0,
+            "1193": 105.0,
+            "1194": 99.0,
+            "1195": 118.0,
+            "1196": 120.0,
+            "1197": 93.0,
+            "1198": 101.0,
+            "1199": 103.0,
+            "1200": 90.0,
+            "1201": 108.0,
+            "1202": 120.0,
+            "1203": 90.0,
+            "1204": 98.0,
+            "1205": 113.0,
+            "1206": 102.0,
+            "1207": 116.0,
+            "1208": 104.0,
+            "1209": 85.0,
+            "1210": 101.0,
+            "1211": 87.0,
+            "1212": 100.0,
+            "1213": 109.0,
+            "1214": 92.0,
+            "1215": 103.0,
+            "1216": 117.0,
+            "1217": 102.0,
+            "1218": 135.0,
+            "1219": 95.0,
+            "1220": 122.0,
+            "1221": 121.0,
+            "1222": 109.0,
+            "1223": 103.0,
+            "1224": 93.0,
+            "1225": 107.0,
+            "1226": 82.0,
+            "1227": 108.0,
+            "1228": 106.0,
+            "1229": 87.0,
+            "1230": 97.0,
+            "1231": 109.0,
+            "1232": 95.0,
+            "1233": 99.0,
+            "1234": 107.0,
+            "1235": 105.0,
+            "1236": 101.0,
+            "1237": 110.0,
+            "1238": 102.0,
+            "1239": 118.0,
+            "1240": 114.0,
+            "1241": 119.0,
+            "1242": 90.0,
+            "1243": 104.0,
+            "1244": 102.0,
+            "1245": 105.0,
+            "1246": 104.0,
+            "1247": 121.0,
+            "1248": 104.0,
+            "1249": 129.0,
+            "1250": 111.0,
+            "1251": 91.0,
+            "1252": 120.0,
+            "1253": 121.0,
+            "1254": 110.0,
+            "1255": 113.0,
+            "1256": 97.0,
+            "1257": 114.0,
+            "1258": 110.0,
+            "1259": 106.0,
+            "1260": 93.0,
+            "1261": 104.0,
+            "1262": 109.0,
+            "1263": 104.0,
+            "1264": 101.0,
+            "1265": 85.0,
+            "1266": 106.0,
+            "1267": 104.0,
+            "1268": 90.0,
+            "1269": 102.0,
+            "1270": 106.0,
+            "1271": 107.0,
+            "1272": 79.0,
+            "1273": 85.0,
+            "1274": 99.0,
+            "1275": 127.0,
+            "1276": 89.0,
+            "1277": 144.0,
+            "1278": 109.0,
+            "1279": 110.0,
+            "1280": 123.0,
+            "1281": 98.0,
+            "1282": 94.0,
+            "1283": 110.0,
+            "1284": 88.0,
+            "1285": 112.0,
+            "1286": 106.0,
+            "1287": 86.0,
+            "1288": 100.0,
+            "1289": 118.0,
+            "1290": 109.0,
+            "1291": 82.0,
+            "1292": 106.0,
+            "1293": 97.0,
+            "1294": 96.0,
+            "1295": 91.0,
+            "1296": 110.0,
+            "1297": 120.0,
+            "1298": 105.0,
+            "1299": 114.0,
+            "1300": 113.0,
+            "1301": 106.0,
+            "1302": 112.0,
+            "1303": 102.0,
+            "1304": 94.0,
+            "1305": 109.0,
+            "1306": 83.0,
+            "1307": 97.0,
+            "1308": 120.0,
+            "1309": 126.0,
+            "1310": 103.0,
+            "1311": 126.0,
+            "1312": 100.0,
+            "1313": 101.0,
+            "1314": 107.0,
+            "1315": 117.0,
+            "1316": 101.0,
+            "1317": 107.0,
+            "1318": 103.0,
+            "1319": 98.0,
+            "1320": 103.0,
+            "1321": 112.0,
+            "1322": 86.0,
+            "1323": 117.0,
+            "1324": 94.0,
+            "1325": 94.0,
+            "1326": 139.0,
+            "1327": 82.0,
+            "1328": 124.0,
+            "1329": 103.0,
+            "1330": 91.0,
+            "1331": 94.0,
+            "1332": 106.0,
+            "1333": 86.0,
+            "1334": 86.0,
+            "1335": 96.0,
+            "1336": 113.0,
+            "1337": 114.0,
+            "1338": 126.0,
+            "1339": 104.0,
+            "1340": 101.0,
+            "1341": 83.0,
+            "1342": 106.0,
+            "1343": 122.0,
+            "1344": 99.0,
+            "1345": 93.0,
+            "1346": 110.0,
+            "1347": 105.0,
+            "1348": 104.0,
+            "1349": 103.0,
+            "1350": 111.0,
+            "1351": 121.0,
+            "1352": 106.0,
+            "1353": 108.0,
+            "1354": 108.0,
+            "1355": 92.0,
+            "1356": 89.0,
+            "1357": 103.0,
+            "1358": 120.0,
+            "1359": 110.0,
+            "1360": 125.0,
+            "1361": 116.0,
+            "1362": 133.0,
+            "1363": 103.0,
+            "1364": 109.0,
+            "1365": 101.0,
+            "1366": 100.0,
+            "1367": 93.0,
+            "1368": 108.0,
+            "1369": 127.0,
+            "1370": 99.0,
+            "1371": 121.0,
+            "1372": 116.0,
+            "1373": 110.0,
+            "1374": 94.0,
+            "1375": 107.0,
+            "1376": 104.0,
+            "1377": 115.0,
+            "1378": 100.0,
+            "1379": 106.0,
+            "1380": 88.0,
+            "1381": 103.0,
+            "1382": 101.0,
+            "1383": 118.0,
+            "1384": 120.0,
+            "1385": 117.0,
+            "1386": 123.0,
+            "1387": 93.0,
+            "1388": 86.0,
+            "1389": 119.0,
+            "1390": 116.0,
+            "1391": 103.0,
+            "1392": 84.0,
+            "1393": 100.0,
+            "1394": 112.0,
+            "1395": 77.0,
+            "1396": 101.0,
+            "1397": 124.0,
+            "1398": 104.0,
+            "1399": 120.0,
+            "1400": 103.0,
+            "1401": 100.0,
+            "1402": 105.0,
+            "1403": 82.0,
+            "1404": 104.0,
+            "1405": 93.0,
+            "1406": 102.0,
+            "1407": 118.0,
+            "1408": 100.0,
+            "1409": 114.0,
+            "1410": 85.0,
+            "1411": 101.0,
+            "1412": 99.0,
+            "1413": 117.0,
+            "1414": 116.0,
+            "1415": 115.0,
+            "1416": 90.0,
+            "1417": 99.0,
+            "1418": 97.0,
+            "1419": 96.0,
+            "1420": 119.0,
+            "1421": 108.0,
+            "1422": 113.0,
+            "1423": 91.0,
+            "1424": 123.0,
+            "1425": 101.0,
+            "1426": 110.0,
+            "1427": 107.0,
+            "1428": 116.0,
+            "1429": 128.0,
+            "1430": 87.0,
+            "1431": 96.0,
+            "1432": 113.0,
+            "1433": 92.0,
+            "1434": 101.0,
+            "1435": 101.0,
+            "1436": 111.0,
+            "1437": 122.0,
+            "1438": 105.0,
+            "1439": 99.0,
+            "1440": 101.0,
+            "1441": 104.0,
+            "1442": 89.0,
+            "1443": 109.0,
+            "1444": 86.0,
+            "1445": 100.0,
+            "1446": 87.0,
+            "1447": 105.0,
+            "1448": 102.0,
+            "1449": 88.0,
+            "1450": 100.0,
+            "1451": 94.0,
+            "1452": 95.0,
+            "1453": 116.0,
+            "1454": 98.0,
+            "1455": 92.0,
+            "1456": 91.0,
+            "1457": 132.0,
+            "1458": 121.0,
+            "1459": 109.0,
+            "1460": 111.0,
+            "1461": 111.0,
+            "1462": 89.0,
+            "1463": 99.0,
+            "1464": 108.0,
+            "1465": 97.0,
+            "1466": 87.0,
+            "1467": 99.0,
+            "1468": 127.0,
+            "1469": 88.0,
+            "1470": 103.0,
+            "1471": 101.0,
+            "1472": 106.0,
+            "1473": 120.0,
+            "1474": 96.0,
+            "1475": 123.0,
+            "1476": 85.0,
+            "1477": 122.0,
+            "1478": 107.0,
+            "1479": 113.0,
+            "1480": 109.0,
+            "1481": 107.0,
+            "1482": 118.0,
+            "1483": 86.0,
+            "1484": 98.0,
+            "1485": 91.0,
+            "1486": 96.0,
+            "1487": 119.0,
+            "1488": 106.0,
+            "1489": 93.0,
+            "1490": 113.0,
+            "1491": 107.0,
+            "1492": 100.0,
+            "1493": 123.0,
+            "1494": 105.0,
+            "1495": 121.0,
+            "1496": 105.0,
+            "1497": 99.0,
+            "1498": 112.0,
+            "1499": 106.0,
+            "1500": 104.0,
+            "1501": 129.0,
+            "1502": 109.0,
+            "1503": 91.0,
+            "1504": 111.0,
+            "1505": 97.0,
+            "1506": 116.0,
+            "1507": 122.0,
+            "1508": 103.0,
+            "1509": 141.0,
+            "1510": 86.0,
+            "1511": 120.0,
+            "1512": 120.0,
+            "1513": 128.0,
+            "1514": 100.0,
+            "1515": 108.0,
+            "1516": 99.0,
+            "1517": 109.0,
+            "1518": 106.0,
+            "1519": 88.0,
+            "1520": 89.0,
+            "1521": 101.0,
+            "1522": 112.0,
+            "1523": 88.0,
+            "1524": 113.0,
+            "1525": 94.0,
+            "1526": 110.0,
+            "1527": 112.0,
+            "1528": 84.0,
+            "1529": 91.0,
+            "1530": 114.0,
+            "1531": 113.0,
+            "1532": 119.0,
+            "1533": 95.0,
+            "1534": 112.0,
+            "1535": 112.0,
+            "1536": 109.0,
+            "1537": 97.0,
+            "1538": 111.0,
+            "1539": 115.0,
+            "1540": 114.0,
+            "1541": 88.0,
+            "1542": 126.0,
+            "1543": 97.0,
+            "1544": 84.0,
+            "1545": 105.0,
+            "1546": 82.0,
+            "1547": 93.0,
+            "1548": 90.0,
+            "1549": 99.0,
+            "1550": 93.0,
+            "1551": 98.0,
+            "1552": 86.0,
+            "1553": 120.0,
+            "1554": 109.0,
+            "1555": 111.0,
+            "1556": 98.0,
+            "1557": 90.0,
+            "1558": 120.0,
+            "1559": 84.0,
+            "1560": 107.0,
+            "1561": 103.0,
+            "1562": 121.0,
+            "1563": 116.0,
+            "1564": 113.0,
+            "1565": 114.0,
+            "1566": 113.0,
+            "1567": 102.0,
+            "1568": 91.0,
+            "1569": 122.0,
+            "1570": 95.0,
+            "1571": 115.0,
+            "1572": 102.0,
+            "1573": 100.0,
+            "1574": 121.0,
+            "1575": 108.0,
+            "1576": 88.0,
+            "1577": 116.0,
+            "1578": 101.0,
+            "1579": 98.0,
+            "1580": 114.0,
+            "1581": 102.0,
+            "1582": 108.0,
+            "1583": 115.0,
+            "1584": 70.0,
+            "1585": 112.0,
+            "1586": 120.0,
+            "1587": 101.0,
+            "1588": 118.0,
+            "1589": 99.0,
+            "1590": 103.0,
+            "1591": 108.0,
+            "1592": 106.0,
+            "1593": 121.0,
+            "1594": 110.0,
+            "1595": 103.0,
+            "1596": 117.0,
+            "1597": 115.0,
+            "1598": 105.0,
+            "1599": 76.0,
+            "1600": 90.0,
+            "1601": 108.0,
+            "1602": 105.0,
+            "1603": 122.0,
+            "1604": 113.0,
+            "1605": 122.0,
+            "1606": 117.0,
+            "1607": 92.0,
+            "1608": 118.0,
+            "1609": 115.0,
+            "1610": 103.0,
+            "1611": 117.0,
+            "1612": 106.0,
+            "1613": 106.0,
+            "1614": 104.0,
+            "1615": 114.0,
+            "1616": 88.0,
+            "1617": 97.0,
+            "1618": 111.0,
+            "1619": 107.0,
+            "1620": 112.0,
+            "1621": 91.0,
+            "1622": 130.0,
+            "1623": 109.0,
+            "1624": 102.0,
+            "1625": 121.0,
+            "1626": 100.0,
+            "1627": 119.0,
+            "1628": 99.0,
+            "1629": 119.0,
+            "1630": 117.0,
+            "1631": 105.0,
+            "1632": 116.0,
+            "1633": 112.0,
+            "1634": 120.0,
+            "1635": 99.0,
+            "1636": 105.0,
+            "1637": 94.0,
+            "1638": 107.0,
+            "1639": 97.0,
+            "1640": 106.0,
+            "1641": 120.0,
+            "1642": 101.0,
+            "1643": 135.0,
+            "1644": 117.0,
+            "1645": 110.0,
+            "1646": 106.0,
+            "1647": 127.0,
+            "1648": 82.0,
+            "1649": 114.0,
+            "1650": 121.0,
+            "1651": 107.0,
+            "1652": 100.0,
+            "1653": 108.0,
+            "1654": 114.0,
+            "1655": 92.0,
+            "1656": 80.0,
+            "1657": 110.0,
+            "1658": 114.0,
+            "1659": 105.0,
+            "1660": 104.0,
+            "1661": 102.0,
+            "1662": 124.0,
+            "1663": 96.0,
+            "1664": 127.0,
+            "1665": 89.0,
+            "1666": 115.0,
+            "1667": 114.0,
+            "1668": 122.0,
+            "1669": 94.0,
+            "1670": 114.0,
+            "1671": 102.0,
+            "1672": 99.0,
+            "1673": 109.0,
+            "1674": 117.0,
+            "1675": 105.0,
+            "1676": 116.0,
+            "1677": 101.0,
+            "1678": 110.0,
+            "1679": 112.0,
+            "1680": 96.0,
+            "1681": 93.0,
+            "1682": 97.0,
+            "1683": 106.0,
+            "1684": 103.0,
+            "1685": 101.0,
+            "1686": 109.0,
+            "1687": 104.0,
+            "1688": 127.0,
+            "1689": 88.0,
+            "1690": 98.0,
+            "1691": 90.0,
+            "1692": 107.0,
+            "1693": 111.0,
+            "1694": 125.0,
+            "1695": 129.0,
+            "1696": 112.0,
+            "1697": 126.0,
+            "1698": 104.0,
+            "1699": 124.0,
+            "1700": 112.0,
+            "1701": 120.0,
+            "1702": 89.0,
+            "1703": 103.0,
+            "1704": 103.0,
+            "1705": 111.0,
+            "1706": 124.0,
+            "1707": 93.0,
+            "1708": 96.0,
+            "1709": 116.0,
+            "1710": 133.0,
+            "1711": 107.0,
+            "1712": 100.0,
+            "1713": 91.0,
+            "1714": 122.0,
+            "1715": 108.0,
+            "1716": 110.0,
+            "1717": 121.0,
+            "1718": 101.0,
+            "1719": 110.0,
+            "1720": 121.0,
+            "1721": 109.0,
+            "1722": 96.0,
+            "1723": 125.0,
+            "1724": 118.0,
+            "1725": 122.0,
+            "1726": 113.0,
+            "1727": 99.0,
+            "1728": 98.0,
+            "1729": 115.0,
+            "1730": 106.0,
+            "1731": 96.0,
+            "1732": 95.0,
+            "1733": 115.0,
+            "1734": 106.0,
+            "1735": 102.0,
+            "1736": 104.0,
+            "1737": 122.0,
+            "1738": 94.0,
+            "1739": 92.0,
+            "1740": 105.0,
+            "1741": 113.0,
+            "1742": 129.0,
+            "1743": 113.0,
+            "1744": 110.0,
+            "1745": 113.0,
+            "1746": 127.0,
+            "1747": 108.0,
+            "1748": 120.0,
+            "1749": 115.0,
+            "1750": 104.0,
+            "1751": 114.0,
+            "1752": 122.0,
+            "1753": 113.0,
+            "1754": 123.0,
+            "1755": 114.0,
+            "1756": 115.0,
+            "1757": 126.0,
+            "1758": 105.0,
+            "1759": 109.0,
+            "1760": 136.0,
+            "1761": 111.0,
+            "1762": 104.0,
+            "1763": 104.0,
+            "1764": 105.0,
+            "1765": 133.0,
+            "1766": 118.0,
+            "1767": 108.0,
+            "1768": 114.0,
+            "1769": 105.0,
+            "1770": 98.0,
+            "1771": 112.0,
+            "1772": 92.0,
+            "1773": 77.0,
+            "1774": 130.0,
+            "1775": 104.0,
+            "1776": 85.0,
+            "1777": 106.0,
+            "1778": 84.0,
+            "1779": 111.0,
+            "1780": 109.0,
+            "1781": 124.0,
+            "1782": 109.0,
+            "1783": 128.0,
+            "1784": 117.0,
+            "1785": 118.0,
+            "1786": 111.0,
+            "1787": 112.0,
+            "1788": 104.0,
+            "1789": 135.0,
+            "1790": 105.0,
+            "1791": 115.0,
+            "1792": 130.0,
+            "1793": 119.0,
+            "1794": 128.0,
+            "1795": 110.0,
+            "1796": 130.0,
+            "1797": 97.0,
+            "1798": 139.0,
+            "1799": 104.0,
+            "1800": 103.0,
+            "1801": 94.0,
+            "1802": 134.0,
+            "1803": 117.0,
+            "1804": 139.0,
+            "1805": 124.0,
+            "1806": 127.0,
+            "1807": 128.0,
+            "1808": 99.0,
+            "1809": 92.0,
+            "1810": 116.0,
+            "1811": 104.0,
+            "1812": 103.0,
+            "1813": 122.0,
+            "1814": 129.0,
+            "1815": 94.0,
+            "1816": 104.0,
+            "1817": 98.0,
+            "1818": 128.0,
+            "1819": 112.0,
+            "1820": 99.0,
+            "1821": 126.0,
+            "1822": 83.0,
+            "1823": 117.0,
+            "1824": 96.0,
+            "1825": 95.0,
+            "1826": 127.0,
+            "1827": 124.0,
+            "1828": 120.0,
+            "1829": 110.0,
+            "1830": 123.0,
+            "1831": 110.0,
+            "1832": 92.0,
+            "1833": 100.0,
+            "1834": 113.0,
+            "1835": 120.0,
+            "1836": 113.0,
+            "1837": 114.0,
+            "1838": 99.0,
+            "1839": 123.0,
+            "1840": 109.0,
+            "1841": 95.0,
+            "1842": 101.0,
+            "1843": 122.0,
+            "1844": 113.0,
+            "1845": 127.0,
+            "1846": 100.0,
+            "1847": 117.0,
+            "1848": 133.0,
+            "1849": 87.0,
+            "1850": 103.0,
+            "1851": 89.0,
+            "1852": 99.0,
+            "1853": 93.0,
+            "1854": 99.0,
+            "1855": 107.0,
+            "1856": 111.0,
+            "1857": 121.0,
+            "1858": 92.0,
+            "1859": 105.0,
+            "1860": 115.0,
+            "1861": 92.0,
+            "1862": 91.0,
+            "1863": 112.0,
+            "1864": 109.0,
+            "1865": 125.0,
+            "1866": 124.0,
+            "1867": 110.0,
+            "1868": 113.0,
+            "1869": 119.0,
+            "1870": 137.0,
+            "1871": 126.0,
+            "1872": 95.0,
+            "1873": 119.0,
+            "1874": 105.0,
+            "1875": 128.0,
+            "1876": 104.0,
+            "1877": 120.0,
+            "1878": 95.0,
+            "1879": 99.0,
+            "1880": 123.0,
+            "1881": 99.0,
+            "1882": 97.0,
+            "1883": 101.0,
+            "1884": 115.0,
+            "1885": 106.0,
+            "1886": 123.0,
+            "1887": 121.0,
+            "1888": 121.0,
+            "1889": 114.0,
+            "1890": 100.0,
+            "1891": 110.0,
+            "1892": 107.0,
+            "1893": 113.0,
+            "1894": 134.0,
+            "1895": 114.0,
+            "1896": 111.0,
+            "1897": 122.0,
+            "1898": 108.0,
+            "1899": 94.0,
+            "1900": 123.0,
+            "1901": 125.0,
+            "1902": 115.0,
+            "1903": 112.0,
+            "1904": 113.0,
+            "1905": 109.0,
+            "1906": 115.0,
+            "1907": 95.0,
+            "1908": 113.0,
+            "1909": 79.0,
+            "1910": 97.0,
+            "1911": 135.0,
+            "1912": 122.0,
+            "1913": 105.0,
+            "1914": 112.0,
+            "1915": 129.0,
+            "1916": 117.0,
+            "1917": 115.0,
+            "1918": 113.0,
+            "1919": 117.0,
+            "1920": 122.0,
+            "1921": 105.0,
+            "1922": 86.0,
+            "1923": 113.0,
+            "1924": 111.0,
+            "1925": 110.0,
+            "1926": 112.0,
+            "1927": 103.0,
+            "1928": 108.0,
+            "1929": 113.0,
+            "1930": 121.0,
+            "1931": 111.0,
+            "1932": 106.0,
+            "1933": 114.0,
+            "1934": 117.0,
+            "1935": 93.0,
+            "1936": 109.0,
+            "1937": 121.0,
+            "1938": 108.0,
+            "1939": 132.0,
+            "1940": 127.0,
+            "1941": 126.0,
+            "1942": 101.0,
+            "1943": 120.0,
+            "1944": 87.0,
+            "1945": 114.0,
+            "1946": 105.0,
+            "1947": 109.0,
+            "1948": 109.0,
+            "1949": 106.0,
+            "1950": 111.0,
+            "1951": 120.0,
+            "1952": 104.0,
+            "1953": 113.0,
+            "1954": 116.0,
+            "1955": 131.0,
+            "1956": 91.0,
+            "1957": 118.0,
+            "1958": 139.0,
+            "1959": 114.0,
+            "1960": 96.0,
+            "1961": 109.0,
+            "1962": 113.0,
+            "1963": 125.0,
+            "1964": 112.0,
+            "1965": 108.0,
+            "1966": 130.0,
+            "1967": 120.0,
+            "1968": 110.0,
+            "1969": 96.0,
+            "1970": 110.0,
+            "1971": 121.0,
+            "1972": 104.0,
+            "1973": 103.0,
+            "1974": 110.0,
+            "1975": 101.0,
+            "1976": 144.0,
+            "1977": 122.0,
+            "1978": 118.0,
+            "1979": 121.0,
+            "1980": 115.0,
+            "1981": 114.0,
+            "1982": 136.0,
+            "1983": 123.0,
+            "1984": 112.0,
+            "1985": 116.0,
+            "1986": 104.0,
+            "1987": 133.0,
+            "1988": 107.0,
+            "1989": 100.0,
+            "1990": 112.0,
+            "1991": 119.0,
+            "1992": 103.0,
+            "1993": 133.0,
+            "1994": 123.0,
+            "1995": 118.0,
+            "1996": 109.0,
+            "1997": 119.0,
+            "1998": 107.0,
+            "1999": 119.0,
+            "2000": 134.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 442918400.0,
+            "2": 442918400.0,
+            "3": 442918400.0,
+            "4": 442918400.0,
+            "5": 442918400.0,
+            "6": 442918400.0,
+            "7": 442918400.0,
+            "8": 442918400.0,
+            "9": 442918400.0,
+            "10": 442918400.0,
+            "11": 442918400.0,
+            "12": 442918400.0,
+            "13": 442918400.0,
+            "14": 442918400.0,
+            "15": 442918400.0,
+            "16": 442918400.0,
+            "17": 442918400.0,
+            "18": 442918400.0,
+            "19": 442918400.0,
+            "20": 442918400.0,
+            "21": 442918400.0,
+            "22": 442918400.0,
+            "23": 442918400.0,
+            "24": 442918400.0,
+            "25": 442918400.0,
+            "26": 442918400.0,
+            "27": 442918400.0,
+            "28": 442918400.0,
+            "29": 442918400.0,
+            "30": 442918400.0,
+            "31": 442918400.0,
+            "32": 442918400.0,
+            "33": 442918400.0,
+            "34": 442918400.0,
+            "35": 442918400.0,
+            "36": 442918400.0,
+            "37": 442918400.0,
+            "38": 442918400.0,
+            "39": 442918400.0,
+            "40": 442918400.0,
+            "41": 442918400.0,
+            "42": 442918400.0,
+            "43": 442918400.0,
+            "44": 442918400.0,
+            "45": 442918400.0,
+            "46": 442918400.0,
+            "47": 442918400.0,
+            "48": 442918400.0,
+            "49": 442918400.0,
+            "50": 442918400.0,
+            "51": 442918400.0,
+            "52": 442918400.0,
+            "53": 442918400.0,
+            "54": 442918400.0,
+            "55": 442918400.0,
+            "56": 442918400.0,
+            "57": 442918400.0,
+            "58": 442918400.0,
+            "59": 442918400.0,
+            "60": 442918400.0,
+            "61": 442918400.0,
+            "62": 442918400.0,
+            "63": 442918400.0,
+            "64": 442918400.0,
+            "65": 442918400.0,
+            "66": 442918400.0,
+            "67": 442918400.0,
+            "68": 442918400.0,
+            "69": 442918400.0,
+            "70": 442918400.0,
+            "71": 442918400.0,
+            "72": 442918400.0,
+            "73": 442918400.0,
+            "74": 442918400.0,
+            "75": 442918400.0,
+            "76": 442918400.0,
+            "77": 442918400.0,
+            "78": 442918400.0,
+            "79": 442918400.0,
+            "80": 442918400.0,
+            "81": 442918400.0,
+            "82": 442918400.0,
+            "83": 442918400.0,
+            "84": 442918400.0,
+            "85": 442918400.0,
+            "86": 442918400.0,
+            "87": 442918400.0,
+            "88": 442918400.0,
+            "89": 442918400.0,
+            "90": 442918400.0,
+            "91": 442918400.0,
+            "92": 442918400.0,
+            "93": 442918400.0,
+            "94": 442918400.0,
+            "95": 442918400.0,
+            "96": 442918400.0,
+            "97": 442918400.0,
+            "98": 442918400.0,
+            "99": 442918400.0,
+            "100": 442918400.0,
+            "101": 442918400.0,
+            "102": 442918400.0,
+            "103": 442918400.0,
+            "104": 442918400.0,
+            "105": 442918400.0,
+            "106": 442918400.0,
+            "107": 442918400.0,
+            "108": 442918400.0,
+            "109": 442918400.0,
+            "110": 442918400.0,
+            "111": 442918400.0,
+            "112": 442918400.0,
+            "113": 442918400.0,
+            "114": 442918400.0,
+            "115": 442918400.0,
+            "116": 442918400.0,
+            "117": 442918400.0,
+            "118": 442918400.0,
+            "119": 442918400.0,
+            "120": 442918400.0,
+            "121": 442918400.0,
+            "122": 442918400.0,
+            "123": 442918400.0,
+            "124": 442918400.0,
+            "125": 442918400.0,
+            "126": 442918400.0,
+            "127": 442918400.0,
+            "128": 442918400.0,
+            "129": 442918400.0,
+            "130": 442918400.0,
+            "131": 442918400.0,
+            "132": 442918400.0,
+            "133": 442918400.0,
+            "134": 442918400.0,
+            "135": 442918400.0,
+            "136": 442918400.0,
+            "137": 442918400.0,
+            "138": 442918400.0,
+            "139": 442918400.0,
+            "140": 442918400.0,
+            "141": 442918400.0,
+            "142": 442918400.0,
+            "143": 442918400.0,
+            "144": 442918400.0,
+            "145": 442918400.0,
+            "146": 442918400.0,
+            "147": 442918400.0,
+            "148": 442918400.0,
+            "149": 442918400.0,
+            "150": 442918400.0,
+            "151": 442918400.0,
+            "152": 442918400.0,
+            "153": 442918400.0,
+            "154": 442918400.0,
+            "155": 442918400.0,
+            "156": 442918400.0,
+            "157": 442918400.0,
+            "158": 442918400.0,
+            "159": 442918400.0,
+            "160": 442918400.0,
+            "161": 442918400.0,
+            "162": 442918400.0,
+            "163": 442918400.0,
+            "164": 442918400.0,
+            "165": 442918400.0,
+            "166": 442918400.0,
+            "167": 442918400.0,
+            "168": 442918400.0,
+            "169": 442918400.0,
+            "170": 442918400.0,
+            "171": 442918400.0,
+            "172": 442918400.0,
+            "173": 442918400.0,
+            "174": 442918400.0,
+            "175": 442918400.0,
+            "176": 442918400.0,
+            "177": 442918400.0,
+            "178": 442918400.0,
+            "179": 442918400.0,
+            "180": 442918400.0,
+            "181": 442918400.0,
+            "182": 442918400.0,
+            "183": 442918400.0,
+            "184": 442918400.0,
+            "185": 442918400.0,
+            "186": 442918400.0,
+            "187": 442918400.0,
+            "188": 442918400.0,
+            "189": 442918400.0,
+            "190": 442918400.0,
+            "191": 442918400.0,
+            "192": 442918400.0,
+            "193": 442918400.0,
+            "194": 442918400.0,
+            "195": 442918400.0,
+            "196": 442918400.0,
+            "197": 442918400.0,
+            "198": 442918400.0,
+            "199": 442918400.0,
+            "200": 442918400.0,
+            "201": 442918400.0,
+            "202": 442918400.0,
+            "203": 442918400.0,
+            "204": 442918400.0,
+            "205": 442918400.0,
+            "206": 442918400.0,
+            "207": 442918400.0,
+            "208": 442918400.0,
+            "209": 442918400.0,
+            "210": 442918400.0,
+            "211": 442918400.0,
+            "212": 442918400.0,
+            "213": 442918400.0,
+            "214": 442918400.0,
+            "215": 442918400.0,
+            "216": 442918400.0,
+            "217": 442918400.0,
+            "218": 442918400.0,
+            "219": 442918400.0,
+            "220": 442918400.0,
+            "221": 442918400.0,
+            "222": 442918400.0,
+            "223": 442918400.0,
+            "224": 442918400.0,
+            "225": 442918400.0,
+            "226": 442918400.0,
+            "227": 442918400.0,
+            "228": 442918400.0,
+            "229": 442918400.0,
+            "230": 442918400.0,
+            "231": 442918400.0,
+            "232": 442918400.0,
+            "233": 442918400.0,
+            "234": 442918400.0,
+            "235": 442918400.0,
+            "236": 442918400.0,
+            "237": 442918400.0,
+            "238": 442918400.0,
+            "239": 442918400.0,
+            "240": 442918400.0,
+            "241": 442918400.0,
+            "242": 442918400.0,
+            "243": 442918400.0,
+            "244": 442918400.0,
+            "245": 442918400.0,
+            "246": 442918400.0,
+            "247": 442918400.0,
+            "248": 442918400.0,
+            "249": 442918400.0,
+            "250": 442918400.0,
+            "251": 442918400.0,
+            "252": 442918400.0,
+            "253": 442918400.0,
+            "254": 442918400.0,
+            "255": 442918400.0,
+            "256": 442918400.0,
+            "257": 442918400.0,
+            "258": 442918400.0,
+            "259": 442918400.0,
+            "260": 442918400.0,
+            "261": 442918400.0,
+            "262": 442918400.0,
+            "263": 442918400.0,
+            "264": 442918400.0,
+            "265": 442918400.0,
+            "266": 442918400.0,
+            "267": 442918400.0,
+            "268": 442918400.0,
+            "269": 442918400.0,
+            "270": 442918400.0,
+            "271": 442918400.0,
+            "272": 442918400.0,
+            "273": 442918400.0,
+            "274": 442918400.0,
+            "275": 442918400.0,
+            "276": 442918400.0,
+            "277": 442918400.0,
+            "278": 442918400.0,
+            "279": 442918400.0,
+            "280": 442918400.0,
+            "281": 442918400.0,
+            "282": 442918400.0,
+            "283": 442918400.0,
+            "284": 442918400.0,
+            "285": 442918400.0,
+            "286": 442918400.0,
+            "287": 442918400.0,
+            "288": 442918400.0,
+            "289": 442918400.0,
+            "290": 442918400.0,
+            "291": 442918400.0,
+            "292": 442918400.0,
+            "293": 442918400.0,
+            "294": 442918400.0,
+            "295": 442918400.0,
+            "296": 442918400.0,
+            "297": 442918400.0,
+            "298": 442918400.0,
+            "299": 442918400.0,
+            "300": 442918400.0,
+            "301": 442918400.0,
+            "302": 442918400.0,
+            "303": 442918400.0,
+            "304": 442918400.0,
+            "305": 442918400.0,
+            "306": 442918400.0,
+            "307": 442918400.0,
+            "308": 442918400.0,
+            "309": 442918400.0,
+            "310": 442918400.0,
+            "311": 442918400.0,
+            "312": 442918400.0,
+            "313": 442918400.0,
+            "314": 442918400.0,
+            "315": 442918400.0,
+            "316": 442918400.0,
+            "317": 442918400.0,
+            "318": 442918400.0,
+            "319": 442918400.0,
+            "320": 442918400.0,
+            "321": 442918400.0,
+            "322": 442918400.0,
+            "323": 442918400.0,
+            "324": 442918400.0,
+            "325": 442918400.0,
+            "326": 442918400.0,
+            "327": 442918400.0,
+            "328": 442918400.0,
+            "329": 442918400.0,
+            "330": 442918400.0,
+            "331": 442918400.0,
+            "332": 442918400.0,
+            "333": 442918400.0,
+            "334": 442918400.0,
+            "335": 442918400.0,
+            "336": 442918400.0,
+            "337": 442918400.0,
+            "338": 442918400.0,
+            "339": 442918400.0,
+            "340": 442918400.0,
+            "341": 442918400.0,
+            "342": 442918400.0,
+            "343": 442918400.0,
+            "344": 442918400.0,
+            "345": 442918400.0,
+            "346": 442918400.0,
+            "347": 442918400.0,
+            "348": 442918400.0,
+            "349": 442918400.0,
+            "350": 442918400.0,
+            "351": 442918400.0,
+            "352": 442918400.0,
+            "353": 442918400.0,
+            "354": 442918400.0,
+            "355": 442918400.0,
+            "356": 442918400.0,
+            "357": 442918400.0,
+            "358": 442918400.0,
+            "359": 442918400.0,
+            "360": 442918400.0,
+            "361": 442918400.0,
+            "362": 442918400.0,
+            "363": 442918400.0,
+            "364": 442918400.0,
+            "365": 442918400.0,
+            "366": 442918400.0,
+            "367": 442918400.0,
+            "368": 442918400.0,
+            "369": 442918400.0,
+            "370": 442918400.0,
+            "371": 442918400.0,
+            "372": 442918400.0,
+            "373": 442918400.0,
+            "374": 442918400.0,
+            "375": 442918400.0,
+            "376": 442918400.0,
+            "377": 442918400.0,
+            "378": 442918400.0,
+            "379": 442918400.0,
+            "380": 442918400.0,
+            "381": 442918400.0,
+            "382": 442918400.0,
+            "383": 442918400.0,
+            "384": 442918400.0,
+            "385": 442918400.0,
+            "386": 442918400.0,
+            "387": 442918400.0,
+            "388": 442918400.0,
+            "389": 442918400.0,
+            "390": 442918400.0,
+            "391": 442918400.0,
+            "392": 442918400.0,
+            "393": 442918400.0,
+            "394": 442918400.0,
+            "395": 442918400.0,
+            "396": 442918400.0,
+            "397": 442918400.0,
+            "398": 442918400.0,
+            "399": 442918400.0,
+            "400": 442918400.0,
+            "401": 442918400.0,
+            "402": 442918400.0,
+            "403": 442918400.0,
+            "404": 442918400.0,
+            "405": 442918400.0,
+            "406": 442918400.0,
+            "407": 442918400.0,
+            "408": 442918400.0,
+            "409": 442918400.0,
+            "410": 442918400.0,
+            "411": 442918400.0,
+            "412": 442918400.0,
+            "413": 442918400.0,
+            "414": 442918400.0,
+            "415": 442918400.0,
+            "416": 442918400.0,
+            "417": 442918400.0,
+            "418": 442918400.0,
+            "419": 442918400.0,
+            "420": 442918400.0,
+            "421": 442918400.0,
+            "422": 442918400.0,
+            "423": 442918400.0,
+            "424": 442918400.0,
+            "425": 442918400.0,
+            "426": 442918400.0,
+            "427": 442918400.0,
+            "428": 442918400.0,
+            "429": 442918400.0,
+            "430": 442918400.0,
+            "431": 442918400.0,
+            "432": 442918400.0,
+            "433": 442918400.0,
+            "434": 442918400.0,
+            "435": 442918400.0,
+            "436": 442918400.0,
+            "437": 442918400.0,
+            "438": 442918400.0,
+            "439": 442918400.0,
+            "440": 442918400.0,
+            "441": 442918400.0,
+            "442": 442918400.0,
+            "443": 442918400.0,
+            "444": 442918400.0,
+            "445": 442918400.0,
+            "446": 442918400.0,
+            "447": 442918400.0,
+            "448": 442918400.0,
+            "449": 442918400.0,
+            "450": 442918400.0,
+            "451": 442918400.0,
+            "452": 442918400.0,
+            "453": 442918400.0,
+            "454": 442918400.0,
+            "455": 442918400.0,
+            "456": 442918400.0,
+            "457": 442918400.0,
+            "458": 442918400.0,
+            "459": 442918400.0,
+            "460": 442918400.0,
+            "461": 442918400.0,
+            "462": 442918400.0,
+            "463": 442918400.0,
+            "464": 442918400.0,
+            "465": 442918400.0,
+            "466": 442918400.0,
+            "467": 442918400.0,
+            "468": 442918400.0,
+            "469": 442918400.0,
+            "470": 442918400.0,
+            "471": 442918400.0,
+            "472": 442918400.0,
+            "473": 442918400.0,
+            "474": 442918400.0,
+            "475": 442918400.0,
+            "476": 442918400.0,
+            "477": 442918400.0,
+            "478": 442918400.0,
+            "479": 442918400.0,
+            "480": 442918400.0,
+            "481": 442918400.0,
+            "482": 442918400.0,
+            "483": 442918400.0,
+            "484": 442918400.0,
+            "485": 442918400.0,
+            "486": 442918400.0,
+            "487": 442918400.0,
+            "488": 442918400.0,
+            "489": 442918400.0,
+            "490": 442918400.0,
+            "491": 442918400.0,
+            "492": 442918400.0,
+            "493": 442918400.0,
+            "494": 442918400.0,
+            "495": 442918400.0,
+            "496": 442918400.0,
+            "497": 442918400.0,
+            "498": 442918400.0,
+            "499": 442918400.0,
+            "500": 442918400.0,
+            "501": 442918400.0,
+            "502": 442918400.0,
+            "503": 442918400.0,
+            "504": 442918400.0,
+            "505": 442918400.0,
+            "506": 442918400.0,
+            "507": 442918400.0,
+            "508": 442918400.0,
+            "509": 442918400.0,
+            "510": 442918400.0,
+            "511": 442918400.0,
+            "512": 442918400.0,
+            "513": 442918400.0,
+            "514": 442918400.0,
+            "515": 442918400.0,
+            "516": 442918400.0,
+            "517": 442918400.0,
+            "518": 442918400.0,
+            "519": 442918400.0,
+            "520": 442918400.0,
+            "521": 442918400.0,
+            "522": 442918400.0,
+            "523": 442918400.0,
+            "524": 442918400.0,
+            "525": 442918400.0,
+            "526": 442918400.0,
+            "527": 442918400.0,
+            "528": 442918400.0,
+            "529": 442918400.0,
+            "530": 442918400.0,
+            "531": 442918400.0,
+            "532": 442918400.0,
+            "533": 442918400.0,
+            "534": 442918400.0,
+            "535": 442918400.0,
+            "536": 442918400.0,
+            "537": 442918400.0,
+            "538": 442918400.0,
+            "539": 442918400.0,
+            "540": 442918400.0,
+            "541": 442918400.0,
+            "542": 442918400.0,
+            "543": 442918400.0,
+            "544": 442918400.0,
+            "545": 442918400.0,
+            "546": 442918400.0,
+            "547": 442918400.0,
+            "548": 442918400.0,
+            "549": 442918400.0,
+            "550": 442918400.0,
+            "551": 442918400.0,
+            "552": 442918400.0,
+            "553": 442918400.0,
+            "554": 442918400.0,
+            "555": 442918400.0,
+            "556": 442918400.0,
+            "557": 442918400.0,
+            "558": 442918400.0,
+            "559": 442918400.0,
+            "560": 442918400.0,
+            "561": 442918400.0,
+            "562": 442918400.0,
+            "563": 442918400.0,
+            "564": 442918400.0,
+            "565": 442918400.0,
+            "566": 442918400.0,
+            "567": 442918400.0,
+            "568": 442918400.0,
+            "569": 442918400.0,
+            "570": 442918400.0,
+            "571": 442918400.0,
+            "572": 442918400.0,
+            "573": 442918400.0,
+            "574": 442918400.0,
+            "575": 442918400.0,
+            "576": 442918400.0,
+            "577": 442918400.0,
+            "578": 442918400.0,
+            "579": 442918400.0,
+            "580": 442918400.0,
+            "581": 442918400.0,
+            "582": 442918400.0,
+            "583": 442918400.0,
+            "584": 442918400.0,
+            "585": 442918400.0,
+            "586": 442918400.0,
+            "587": 442918400.0,
+            "588": 442918400.0,
+            "589": 442918400.0,
+            "590": 442918400.0,
+            "591": 442918400.0,
+            "592": 442918400.0,
+            "593": 442918400.0,
+            "594": 442918400.0,
+            "595": 442918400.0,
+            "596": 442918400.0,
+            "597": 442918400.0,
+            "598": 442918400.0,
+            "599": 442918400.0,
+            "600": 442918400.0,
+            "601": 442918400.0,
+            "602": 442918400.0,
+            "603": 442918400.0,
+            "604": 442918400.0,
+            "605": 442918400.0,
+            "606": 442918400.0,
+            "607": 442918400.0,
+            "608": 442918400.0,
+            "609": 442918400.0,
+            "610": 442918400.0,
+            "611": 442918400.0,
+            "612": 442918400.0,
+            "613": 442918400.0,
+            "614": 442918400.0,
+            "615": 442918400.0,
+            "616": 442918400.0,
+            "617": 442918400.0,
+            "618": 442918400.0,
+            "619": 442918400.0,
+            "620": 442918400.0,
+            "621": 442918400.0,
+            "622": 442918400.0,
+            "623": 442918400.0,
+            "624": 442918400.0,
+            "625": 442918400.0,
+            "626": 442918400.0,
+            "627": 442918400.0,
+            "628": 442918400.0,
+            "629": 442918400.0,
+            "630": 442918400.0,
+            "631": 442918400.0,
+            "632": 442918400.0,
+            "633": 442918400.0,
+            "634": 442918400.0,
+            "635": 442918400.0,
+            "636": 442918400.0,
+            "637": 442918400.0,
+            "638": 442918400.0,
+            "639": 442918400.0,
+            "640": 442918400.0,
+            "641": 442918400.0,
+            "642": 442918400.0,
+            "643": 442918400.0,
+            "644": 442918400.0,
+            "645": 442918400.0,
+            "646": 442918400.0,
+            "647": 442918400.0,
+            "648": 442918400.0,
+            "649": 442918400.0,
+            "650": 442918400.0,
+            "651": 442918400.0,
+            "652": 442918400.0,
+            "653": 442918400.0,
+            "654": 442918400.0,
+            "655": 442918400.0,
+            "656": 442918400.0,
+            "657": 442918400.0,
+            "658": 442918400.0,
+            "659": 442918400.0,
+            "660": 442918400.0,
+            "661": 442918400.0,
+            "662": 442918400.0,
+            "663": 442918400.0,
+            "664": 442918400.0,
+            "665": 442918400.0,
+            "666": 442918400.0,
+            "667": 442918400.0,
+            "668": 442918400.0,
+            "669": 442918400.0,
+            "670": 442918400.0,
+            "671": 442918400.0,
+            "672": 442918400.0,
+            "673": 442918400.0,
+            "674": 442918400.0,
+            "675": 442918400.0,
+            "676": 442918400.0,
+            "677": 442918400.0,
+            "678": 442918400.0,
+            "679": 442918400.0,
+            "680": 442918400.0,
+            "681": 442918400.0,
+            "682": 442918400.0,
+            "683": 442918400.0,
+            "684": 442918400.0,
+            "685": 442918400.0,
+            "686": 442918400.0,
+            "687": 442918400.0,
+            "688": 442918400.0,
+            "689": 442918400.0,
+            "690": 442918400.0,
+            "691": 442918400.0,
+            "692": 442918400.0,
+            "693": 442918400.0,
+            "694": 442918400.0,
+            "695": 442918400.0,
+            "696": 442918400.0,
+            "697": 442918400.0,
+            "698": 442918400.0,
+            "699": 442918400.0,
+            "700": 442918400.0,
+            "701": 442918400.0,
+            "702": 442918400.0,
+            "703": 442918400.0,
+            "704": 442918400.0,
+            "705": 442918400.0,
+            "706": 442918400.0,
+            "707": 442918400.0,
+            "708": 442918400.0,
+            "709": 442918400.0,
+            "710": 442918400.0,
+            "711": 442918400.0,
+            "712": 442918400.0,
+            "713": 442918400.0,
+            "714": 442918400.0,
+            "715": 442918400.0,
+            "716": 442918400.0,
+            "717": 442918400.0,
+            "718": 442918400.0,
+            "719": 442918400.0,
+            "720": 442918400.0,
+            "721": 442918400.0,
+            "722": 442918400.0,
+            "723": 442918400.0,
+            "724": 442918400.0,
+            "725": 442918400.0,
+            "726": 442918400.0,
+            "727": 442918400.0,
+            "728": 442918400.0,
+            "729": 442918400.0,
+            "730": 442918400.0,
+            "731": 442918400.0,
+            "732": 442918400.0,
+            "733": 442918400.0,
+            "734": 442918400.0,
+            "735": 442918400.0,
+            "736": 442918400.0,
+            "737": 442918400.0,
+            "738": 442918400.0,
+            "739": 442918400.0,
+            "740": 442918400.0,
+            "741": 442918400.0,
+            "742": 442918400.0,
+            "743": 442918400.0,
+            "744": 442918400.0,
+            "745": 442918400.0,
+            "746": 442918400.0,
+            "747": 442918400.0,
+            "748": 442918400.0,
+            "749": 442918400.0,
+            "750": 442918400.0,
+            "751": 442918400.0,
+            "752": 442918400.0,
+            "753": 442918400.0,
+            "754": 442918400.0,
+            "755": 442918400.0,
+            "756": 442918400.0,
+            "757": 442918400.0,
+            "758": 442918400.0,
+            "759": 442918400.0,
+            "760": 442918400.0,
+            "761": 442918400.0,
+            "762": 442918400.0,
+            "763": 442918400.0,
+            "764": 442918400.0,
+            "765": 442918400.0,
+            "766": 442918400.0,
+            "767": 442918400.0,
+            "768": 442918400.0,
+            "769": 442918400.0,
+            "770": 442918400.0,
+            "771": 442918400.0,
+            "772": 442918400.0,
+            "773": 442918400.0,
+            "774": 442918400.0,
+            "775": 442918400.0,
+            "776": 442918400.0,
+            "777": 442918400.0,
+            "778": 442918400.0,
+            "779": 442918400.0,
+            "780": 442918400.0,
+            "781": 442918400.0,
+            "782": 442918400.0,
+            "783": 442918400.0,
+            "784": 442918400.0,
+            "785": 442918400.0,
+            "786": 442918400.0,
+            "787": 442918400.0,
+            "788": 442918400.0,
+            "789": 442918400.0,
+            "790": 442918400.0,
+            "791": 442918400.0,
+            "792": 442918400.0,
+            "793": 442918400.0,
+            "794": 442918400.0,
+            "795": 442918400.0,
+            "796": 442918400.0,
+            "797": 442918400.0,
+            "798": 442918400.0,
+            "799": 442918400.0,
+            "800": 442918400.0,
+            "801": 442918400.0,
+            "802": 442918400.0,
+            "803": 442918400.0,
+            "804": 442918400.0,
+            "805": 442918400.0,
+            "806": 442918400.0,
+            "807": 442918400.0,
+            "808": 442918400.0,
+            "809": 442918400.0,
+            "810": 442918400.0,
+            "811": 442918400.0,
+            "812": 442918400.0,
+            "813": 442918400.0,
+            "814": 442918400.0,
+            "815": 442918400.0,
+            "816": 442918400.0,
+            "817": 442918400.0,
+            "818": 442918400.0,
+            "819": 442918400.0,
+            "820": 442918400.0,
+            "821": 442918400.0,
+            "822": 442918400.0,
+            "823": 442918400.0,
+            "824": 442918400.0,
+            "825": 442918400.0,
+            "826": 442918400.0,
+            "827": 442918400.0,
+            "828": 442918400.0,
+            "829": 442918400.0,
+            "830": 442918400.0,
+            "831": 442918400.0,
+            "832": 442918400.0,
+            "833": 442918400.0,
+            "834": 442918400.0,
+            "835": 442918400.0,
+            "836": 442918400.0,
+            "837": 442918400.0,
+            "838": 442918400.0,
+            "839": 442918400.0,
+            "840": 442918400.0,
+            "841": 442918400.0,
+            "842": 442918400.0,
+            "843": 442918400.0,
+            "844": 442918400.0,
+            "845": 442918400.0,
+            "846": 442918400.0,
+            "847": 442918400.0,
+            "848": 442918400.0,
+            "849": 442918400.0,
+            "850": 442918400.0,
+            "851": 442918400.0,
+            "852": 442918400.0,
+            "853": 442918400.0,
+            "854": 442918400.0,
+            "855": 442918400.0,
+            "856": 442918400.0,
+            "857": 442918400.0,
+            "858": 442918400.0,
+            "859": 442918400.0,
+            "860": 442918400.0,
+            "861": 442918400.0,
+            "862": 442918400.0,
+            "863": 442918400.0,
+            "864": 442918400.0,
+            "865": 442918400.0,
+            "866": 442918400.0,
+            "867": 442918400.0,
+            "868": 442918400.0,
+            "869": 442918400.0,
+            "870": 442918400.0,
+            "871": 442918400.0,
+            "872": 442918400.0,
+            "873": 442918400.0,
+            "874": 442918400.0,
+            "875": 442918400.0,
+            "876": 442918400.0,
+            "877": 442918400.0,
+            "878": 442918400.0,
+            "879": 442918400.0,
+            "880": 442918400.0,
+            "881": 442918400.0,
+            "882": 442918400.0,
+            "883": 442918400.0,
+            "884": 442918400.0,
+            "885": 442918400.0,
+            "886": 442918400.0,
+            "887": 442918400.0,
+            "888": 442918400.0,
+            "889": 442918400.0,
+            "890": 442918400.0,
+            "891": 442918400.0,
+            "892": 442918400.0,
+            "893": 442918400.0,
+            "894": 442918400.0,
+            "895": 442918400.0,
+            "896": 442918400.0,
+            "897": 442918400.0,
+            "898": 442918400.0,
+            "899": 442918400.0,
+            "900": 442918400.0,
+            "901": 442918400.0,
+            "902": 442918400.0,
+            "903": 442918400.0,
+            "904": 442918400.0,
+            "905": 442918400.0,
+            "906": 442918400.0,
+            "907": 442918400.0,
+            "908": 442918400.0,
+            "909": 442918400.0,
+            "910": 442918400.0,
+            "911": 442918400.0,
+            "912": 442918400.0,
+            "913": 442918400.0,
+            "914": 442918400.0,
+            "915": 442918400.0,
+            "916": 442918400.0,
+            "917": 442918400.0,
+            "918": 442918400.0,
+            "919": 442918400.0,
+            "920": 442918400.0,
+            "921": 442918400.0,
+            "922": 442918400.0,
+            "923": 442918400.0,
+            "924": 442918400.0,
+            "925": 442918400.0,
+            "926": 442918400.0,
+            "927": 442918400.0,
+            "928": 442918400.0,
+            "929": 442918400.0,
+            "930": 442918400.0,
+            "931": 442918400.0,
+            "932": 442918400.0,
+            "933": 442918400.0,
+            "934": 442918400.0,
+            "935": 442918400.0,
+            "936": 442918400.0,
+            "937": 442918400.0,
+            "938": 442918400.0,
+            "939": 442918400.0,
+            "940": 442918400.0,
+            "941": 442918400.0,
+            "942": 442918400.0,
+            "943": 442918400.0,
+            "944": 442918400.0,
+            "945": 442918400.0,
+            "946": 442918400.0,
+            "947": 442918400.0,
+            "948": 442918400.0,
+            "949": 442918400.0,
+            "950": 442918400.0,
+            "951": 442918400.0,
+            "952": 442918400.0,
+            "953": 442918400.0,
+            "954": 442918400.0,
+            "955": 442918400.0,
+            "956": 442918400.0,
+            "957": 442918400.0,
+            "958": 442918400.0,
+            "959": 442918400.0,
+            "960": 442918400.0,
+            "961": 442918400.0,
+            "962": 442918400.0,
+            "963": 442918400.0,
+            "964": 442918400.0,
+            "965": 442918400.0,
+            "966": 442918400.0,
+            "967": 442918400.0,
+            "968": 442918400.0,
+            "969": 442918400.0,
+            "970": 442918400.0,
+            "971": 442918400.0,
+            "972": 442918400.0,
+            "973": 442918400.0,
+            "974": 442918400.0,
+            "975": 442918400.0,
+            "976": 442918400.0,
+            "977": 442918400.0,
+            "978": 442918400.0,
+            "979": 442918400.0,
+            "980": 442918400.0,
+            "981": 442918400.0,
+            "982": 442918400.0,
+            "983": 442918400.0,
+            "984": 442918400.0,
+            "985": 442918400.0,
+            "986": 442918400.0,
+            "987": 442918400.0,
+            "988": 442918400.0,
+            "989": 442918400.0,
+            "990": 442918400.0,
+            "991": 442918400.0,
+            "992": 442918400.0,
+            "993": 442918400.0,
+            "994": 442918400.0,
+            "995": 442918400.0,
+            "996": 442918400.0,
+            "997": 442918400.0,
+            "998": 442918400.0,
+            "999": 442918400.0,
+            "1000": 442918400.0,
+            "1001": 442918400.0,
+            "1002": 442918400.0,
+            "1003": 442918400.0,
+            "1004": 442918400.0,
+            "1005": 442918400.0,
+            "1006": 442918400.0,
+            "1007": 442918400.0,
+            "1008": 442918400.0,
+            "1009": 442918400.0,
+            "1010": 442918400.0,
+            "1011": 442918400.0,
+            "1012": 442918400.0,
+            "1013": 442918400.0,
+            "1014": 442918400.0,
+            "1015": 442918400.0,
+            "1016": 442918400.0,
+            "1017": 442918400.0,
+            "1018": 442918400.0,
+            "1019": 442918400.0,
+            "1020": 442918400.0,
+            "1021": 442918400.0,
+            "1022": 442918400.0,
+            "1023": 442918400.0,
+            "1024": 442918400.0,
+            "1025": 442918400.0,
+            "1026": 442918400.0,
+            "1027": 442918400.0,
+            "1028": 442918400.0,
+            "1029": 442918400.0,
+            "1030": 442918400.0,
+            "1031": 442918400.0,
+            "1032": 442918400.0,
+            "1033": 442918400.0,
+            "1034": 442918400.0,
+            "1035": 442918400.0,
+            "1036": 442918400.0,
+            "1037": 442918400.0,
+            "1038": 442918400.0,
+            "1039": 442918400.0,
+            "1040": 442918400.0,
+            "1041": 442918400.0,
+            "1042": 442918400.0,
+            "1043": 442918400.0,
+            "1044": 442918400.0,
+            "1045": 442918400.0,
+            "1046": 442918400.0,
+            "1047": 442918400.0,
+            "1048": 442918400.0,
+            "1049": 442918400.0,
+            "1050": 442918400.0,
+            "1051": 442918400.0,
+            "1052": 442918400.0,
+            "1053": 442918400.0,
+            "1054": 442918400.0,
+            "1055": 442918400.0,
+            "1056": 442918400.0,
+            "1057": 442918400.0,
+            "1058": 442918400.0,
+            "1059": 442918400.0,
+            "1060": 442918400.0,
+            "1061": 442918400.0,
+            "1062": 442918400.0,
+            "1063": 442918400.0,
+            "1064": 442918400.0,
+            "1065": 442918400.0,
+            "1066": 442918400.0,
+            "1067": 442918400.0,
+            "1068": 442918400.0,
+            "1069": 442918400.0,
+            "1070": 442918400.0,
+            "1071": 442918400.0,
+            "1072": 442918400.0,
+            "1073": 442918400.0,
+            "1074": 442918400.0,
+            "1075": 442918400.0,
+            "1076": 442918400.0,
+            "1077": 442918400.0,
+            "1078": 442918400.0,
+            "1079": 442918400.0,
+            "1080": 442918400.0,
+            "1081": 442918400.0,
+            "1082": 442918400.0,
+            "1083": 442918400.0,
+            "1084": 442918400.0,
+            "1085": 442918400.0,
+            "1086": 442918400.0,
+            "1087": 442918400.0,
+            "1088": 442918400.0,
+            "1089": 442918400.0,
+            "1090": 442918400.0,
+            "1091": 442918400.0,
+            "1092": 442918400.0,
+            "1093": 442918400.0,
+            "1094": 442918400.0,
+            "1095": 442918400.0,
+            "1096": 442918400.0,
+            "1097": 442918400.0,
+            "1098": 442918400.0,
+            "1099": 442918400.0,
+            "1100": 442918400.0,
+            "1101": 442918400.0,
+            "1102": 442918400.0,
+            "1103": 442918400.0,
+            "1104": 442918400.0,
+            "1105": 442918400.0,
+            "1106": 442918400.0,
+            "1107": 442918400.0,
+            "1108": 442918400.0,
+            "1109": 442918400.0,
+            "1110": 442918400.0,
+            "1111": 442918400.0,
+            "1112": 442918400.0,
+            "1113": 442918400.0,
+            "1114": 442918400.0,
+            "1115": 442918400.0,
+            "1116": 442918400.0,
+            "1117": 442918400.0,
+            "1118": 442918400.0,
+            "1119": 442918400.0,
+            "1120": 442918400.0,
+            "1121": 442918400.0,
+            "1122": 442918400.0,
+            "1123": 442918400.0,
+            "1124": 442918400.0,
+            "1125": 442918400.0,
+            "1126": 442918400.0,
+            "1127": 442918400.0,
+            "1128": 442918400.0,
+            "1129": 442918400.0,
+            "1130": 442918400.0,
+            "1131": 442918400.0,
+            "1132": 442918400.0,
+            "1133": 442918400.0,
+            "1134": 442918400.0,
+            "1135": 442918400.0,
+            "1136": 442918400.0,
+            "1137": 442918400.0,
+            "1138": 442918400.0,
+            "1139": 442918400.0,
+            "1140": 442918400.0,
+            "1141": 442918400.0,
+            "1142": 442918400.0,
+            "1143": 442918400.0,
+            "1144": 442918400.0,
+            "1145": 442918400.0,
+            "1146": 442918400.0,
+            "1147": 442918400.0,
+            "1148": 442918400.0,
+            "1149": 442918400.0,
+            "1150": 442918400.0,
+            "1151": 442918400.0,
+            "1152": 442918400.0,
+            "1153": 442918400.0,
+            "1154": 442918400.0,
+            "1155": 442918400.0,
+            "1156": 442918400.0,
+            "1157": 442918400.0,
+            "1158": 442918400.0,
+            "1159": 442918400.0,
+            "1160": 442918400.0,
+            "1161": 442918400.0,
+            "1162": 442918400.0,
+            "1163": 442918400.0,
+            "1164": 442918400.0,
+            "1165": 442918400.0,
+            "1166": 442918400.0,
+            "1167": 442918400.0,
+            "1168": 442918400.0,
+            "1169": 442918400.0,
+            "1170": 442918400.0,
+            "1171": 442918400.0,
+            "1172": 442918400.0,
+            "1173": 442918400.0,
+            "1174": 442918400.0,
+            "1175": 442918400.0,
+            "1176": 442918400.0,
+            "1177": 442918400.0,
+            "1178": 442918400.0,
+            "1179": 442918400.0,
+            "1180": 442918400.0,
+            "1181": 442918400.0,
+            "1182": 442918400.0,
+            "1183": 442918400.0,
+            "1184": 442918400.0,
+            "1185": 442918400.0,
+            "1186": 442918400.0,
+            "1187": 442918400.0,
+            "1188": 442918400.0,
+            "1189": 442918400.0,
+            "1190": 442918400.0,
+            "1191": 442918400.0,
+            "1192": 442918400.0,
+            "1193": 442918400.0,
+            "1194": 442918400.0,
+            "1195": 442918400.0,
+            "1196": 442918400.0,
+            "1197": 442918400.0,
+            "1198": 442918400.0,
+            "1199": 442918400.0,
+            "1200": 442918400.0,
+            "1201": 442918400.0,
+            "1202": 442918400.0,
+            "1203": 442918400.0,
+            "1204": 442918400.0,
+            "1205": 442918400.0,
+            "1206": 442918400.0,
+            "1207": 442918400.0,
+            "1208": 442918400.0,
+            "1209": 442918400.0,
+            "1210": 442918400.0,
+            "1211": 442918400.0,
+            "1212": 442918400.0,
+            "1213": 442918400.0,
+            "1214": 442918400.0,
+            "1215": 442918400.0,
+            "1216": 442918400.0,
+            "1217": 442918400.0,
+            "1218": 442918400.0,
+            "1219": 442918400.0,
+            "1220": 442918400.0,
+            "1221": 442918400.0,
+            "1222": 442918400.0,
+            "1223": 442918400.0,
+            "1224": 442918400.0,
+            "1225": 442918400.0,
+            "1226": 442918400.0,
+            "1227": 442918400.0,
+            "1228": 442918400.0,
+            "1229": 442918400.0,
+            "1230": 442918400.0,
+            "1231": 442918400.0,
+            "1232": 442918400.0,
+            "1233": 442918400.0,
+            "1234": 442918400.0,
+            "1235": 442918400.0,
+            "1236": 442918400.0,
+            "1237": 442918400.0,
+            "1238": 442918400.0,
+            "1239": 442918400.0,
+            "1240": 442918400.0,
+            "1241": 442918400.0,
+            "1242": 442918400.0,
+            "1243": 442918400.0,
+            "1244": 442918400.0,
+            "1245": 442918400.0,
+            "1246": 442918400.0,
+            "1247": 442918400.0,
+            "1248": 442918400.0,
+            "1249": 442918400.0,
+            "1250": 442918400.0,
+            "1251": 442918400.0,
+            "1252": 442918400.0,
+            "1253": 442918400.0,
+            "1254": 442918400.0,
+            "1255": 442918400.0,
+            "1256": 442918400.0,
+            "1257": 442918400.0,
+            "1258": 442918400.0,
+            "1259": 442918400.0,
+            "1260": 442918400.0,
+            "1261": 442918400.0,
+            "1262": 442918400.0,
+            "1263": 442918400.0,
+            "1264": 442918400.0,
+            "1265": 442918400.0,
+            "1266": 442918400.0,
+            "1267": 442918400.0,
+            "1268": 442918400.0,
+            "1269": 442918400.0,
+            "1270": 442918400.0,
+            "1271": 442918400.0,
+            "1272": 442918400.0,
+            "1273": 442918400.0,
+            "1274": 442918400.0,
+            "1275": 442918400.0,
+            "1276": 442918400.0,
+            "1277": 442918400.0,
+            "1278": 442918400.0,
+            "1279": 442918400.0,
+            "1280": 442918400.0,
+            "1281": 442918400.0,
+            "1282": 442918400.0,
+            "1283": 442918400.0,
+            "1284": 442918400.0,
+            "1285": 442918400.0,
+            "1286": 442918400.0,
+            "1287": 442918400.0,
+            "1288": 442918400.0,
+            "1289": 442918400.0,
+            "1290": 442918400.0,
+            "1291": 442918400.0,
+            "1292": 442918400.0,
+            "1293": 442918400.0,
+            "1294": 442918400.0,
+            "1295": 442918400.0,
+            "1296": 442918400.0,
+            "1297": 442918400.0,
+            "1298": 442918400.0,
+            "1299": 442918400.0,
+            "1300": 442918400.0,
+            "1301": 442918400.0,
+            "1302": 442918400.0,
+            "1303": 442918400.0,
+            "1304": 442918400.0,
+            "1305": 442918400.0,
+            "1306": 442918400.0,
+            "1307": 442918400.0,
+            "1308": 442918400.0,
+            "1309": 442918400.0,
+            "1310": 442918400.0,
+            "1311": 442918400.0,
+            "1312": 442918400.0,
+            "1313": 442918400.0,
+            "1314": 442918400.0,
+            "1315": 442918400.0,
+            "1316": 442918400.0,
+            "1317": 442918400.0,
+            "1318": 442918400.0,
+            "1319": 442918400.0,
+            "1320": 442918400.0,
+            "1321": 442918400.0,
+            "1322": 442918400.0,
+            "1323": 442918400.0,
+            "1324": 442918400.0,
+            "1325": 442918400.0,
+            "1326": 442918400.0,
+            "1327": 442918400.0,
+            "1328": 442918400.0,
+            "1329": 442918400.0,
+            "1330": 442918400.0,
+            "1331": 442918400.0,
+            "1332": 442918400.0,
+            "1333": 442918400.0,
+            "1334": 442918400.0,
+            "1335": 442918400.0,
+            "1336": 442918400.0,
+            "1337": 442918400.0,
+            "1338": 442918400.0,
+            "1339": 442918400.0,
+            "1340": 442918400.0,
+            "1341": 442918400.0,
+            "1342": 442918400.0,
+            "1343": 442918400.0,
+            "1344": 442918400.0,
+            "1345": 442918400.0,
+            "1346": 442918400.0,
+            "1347": 442918400.0,
+            "1348": 442918400.0,
+            "1349": 442918400.0,
+            "1350": 442918400.0,
+            "1351": 442918400.0,
+            "1352": 442918400.0,
+            "1353": 442918400.0,
+            "1354": 442918400.0,
+            "1355": 442918400.0,
+            "1356": 442918400.0,
+            "1357": 442918400.0,
+            "1358": 442918400.0,
+            "1359": 442918400.0,
+            "1360": 442918400.0,
+            "1361": 442918400.0,
+            "1362": 442918400.0,
+            "1363": 442918400.0,
+            "1364": 442918400.0,
+            "1365": 442918400.0,
+            "1366": 442918400.0,
+            "1367": 442918400.0,
+            "1368": 442918400.0,
+            "1369": 442918400.0,
+            "1370": 442918400.0,
+            "1371": 442918400.0,
+            "1372": 442918400.0,
+            "1373": 442918400.0,
+            "1374": 442918400.0,
+            "1375": 442918400.0,
+            "1376": 442918400.0,
+            "1377": 442918400.0,
+            "1378": 442918400.0,
+            "1379": 442918400.0,
+            "1380": 442918400.0,
+            "1381": 442918400.0,
+            "1382": 442918400.0,
+            "1383": 442918400.0,
+            "1384": 442918400.0,
+            "1385": 442918400.0,
+            "1386": 442918400.0,
+            "1387": 442918400.0,
+            "1388": 442918400.0,
+            "1389": 442918400.0,
+            "1390": 442918400.0,
+            "1391": 442918400.0,
+            "1392": 442918400.0,
+            "1393": 442918400.0,
+            "1394": 442918400.0,
+            "1395": 442918400.0,
+            "1396": 442918400.0,
+            "1397": 442918400.0,
+            "1398": 442918400.0,
+            "1399": 442918400.0,
+            "1400": 442918400.0,
+            "1401": 442918400.0,
+            "1402": 442918400.0,
+            "1403": 442918400.0,
+            "1404": 442918400.0,
+            "1405": 442918400.0,
+            "1406": 442918400.0,
+            "1407": 442918400.0,
+            "1408": 442918400.0,
+            "1409": 442918400.0,
+            "1410": 442918400.0,
+            "1411": 442918400.0,
+            "1412": 442918400.0,
+            "1413": 442918400.0,
+            "1414": 442918400.0,
+            "1415": 442918400.0,
+            "1416": 442918400.0,
+            "1417": 442918400.0,
+            "1418": 442918400.0,
+            "1419": 442918400.0,
+            "1420": 442918400.0,
+            "1421": 442918400.0,
+            "1422": 442918400.0,
+            "1423": 442918400.0,
+            "1424": 442918400.0,
+            "1425": 442918400.0,
+            "1426": 442918400.0,
+            "1427": 442918400.0,
+            "1428": 442918400.0,
+            "1429": 442918400.0,
+            "1430": 442918400.0,
+            "1431": 442918400.0,
+            "1432": 442918400.0,
+            "1433": 442918400.0,
+            "1434": 442918400.0,
+            "1435": 442918400.0,
+            "1436": 442918400.0,
+            "1437": 442918400.0,
+            "1438": 442918400.0,
+            "1439": 442918400.0,
+            "1440": 442918400.0,
+            "1441": 442918400.0,
+            "1442": 442918400.0,
+            "1443": 442918400.0,
+            "1444": 442918400.0,
+            "1445": 442918400.0,
+            "1446": 442918400.0,
+            "1447": 442918400.0,
+            "1448": 442918400.0,
+            "1449": 442918400.0,
+            "1450": 442918400.0,
+            "1451": 442918400.0,
+            "1452": 442918400.0,
+            "1453": 442918400.0,
+            "1454": 442918400.0,
+            "1455": 442918400.0,
+            "1456": 442918400.0,
+            "1457": 442918400.0,
+            "1458": 442918400.0,
+            "1459": 442918400.0,
+            "1460": 442918400.0,
+            "1461": 442918400.0,
+            "1462": 442918400.0,
+            "1463": 442918400.0,
+            "1464": 442918400.0,
+            "1465": 442918400.0,
+            "1466": 442918400.0,
+            "1467": 442918400.0,
+            "1468": 442918400.0,
+            "1469": 442918400.0,
+            "1470": 442918400.0,
+            "1471": 442918400.0,
+            "1472": 442918400.0,
+            "1473": 442918400.0,
+            "1474": 442918400.0,
+            "1475": 442918400.0,
+            "1476": 442918400.0,
+            "1477": 442918400.0,
+            "1478": 442918400.0,
+            "1479": 442918400.0,
+            "1480": 442918400.0,
+            "1481": 442918400.0,
+            "1482": 442918400.0,
+            "1483": 442918400.0,
+            "1484": 442918400.0,
+            "1485": 442918400.0,
+            "1486": 442918400.0,
+            "1487": 442918400.0,
+            "1488": 442918400.0,
+            "1489": 442918400.0,
+            "1490": 442918400.0,
+            "1491": 442918400.0,
+            "1492": 442918400.0,
+            "1493": 442918400.0,
+            "1494": 442918400.0,
+            "1495": 442918400.0,
+            "1496": 442918400.0,
+            "1497": 442918400.0,
+            "1498": 442918400.0,
+            "1499": 442918400.0,
+            "1500": 442918400.0,
+            "1501": 442918400.0,
+            "1502": 442918400.0,
+            "1503": 442918400.0,
+            "1504": 442918400.0,
+            "1505": 442918400.0,
+            "1506": 442918400.0,
+            "1507": 442918400.0,
+            "1508": 442918400.0,
+            "1509": 442918400.0,
+            "1510": 442918400.0,
+            "1511": 442918400.0,
+            "1512": 442918400.0,
+            "1513": 442918400.0,
+            "1514": 442918400.0,
+            "1515": 442918400.0,
+            "1516": 442918400.0,
+            "1517": 442918400.0,
+            "1518": 442918400.0,
+            "1519": 442918400.0,
+            "1520": 442918400.0,
+            "1521": 442918400.0,
+            "1522": 442918400.0,
+            "1523": 442918400.0,
+            "1524": 442918400.0,
+            "1525": 442918400.0,
+            "1526": 442918400.0,
+            "1527": 442918400.0,
+            "1528": 442918400.0,
+            "1529": 442918400.0,
+            "1530": 442918400.0,
+            "1531": 442918400.0,
+            "1532": 442918400.0,
+            "1533": 442918400.0,
+            "1534": 442918400.0,
+            "1535": 442918400.0,
+            "1536": 442918400.0,
+            "1537": 442918400.0,
+            "1538": 442918400.0,
+            "1539": 442918400.0,
+            "1540": 442918400.0,
+            "1541": 442918400.0,
+            "1542": 442918400.0,
+            "1543": 442918400.0,
+            "1544": 442918400.0,
+            "1545": 442918400.0,
+            "1546": 442918400.0,
+            "1547": 442918400.0,
+            "1548": 442918400.0,
+            "1549": 442918400.0,
+            "1550": 442918400.0,
+            "1551": 442918400.0,
+            "1552": 442918400.0,
+            "1553": 442918400.0,
+            "1554": 442918400.0,
+            "1555": 442918400.0,
+            "1556": 442918400.0,
+            "1557": 442918400.0,
+            "1558": 442918400.0,
+            "1559": 442918400.0,
+            "1560": 442918400.0,
+            "1561": 442918400.0,
+            "1562": 442918400.0,
+            "1563": 442918400.0,
+            "1564": 442918400.0,
+            "1565": 442918400.0,
+            "1566": 442918400.0,
+            "1567": 442918400.0,
+            "1568": 442918400.0,
+            "1569": 442918400.0,
+            "1570": 442918400.0,
+            "1571": 442918400.0,
+            "1572": 442918400.0,
+            "1573": 442918400.0,
+            "1574": 442918400.0,
+            "1575": 442918400.0,
+            "1576": 442918400.0,
+            "1577": 442918400.0,
+            "1578": 442918400.0,
+            "1579": 442918400.0,
+            "1580": 442918400.0,
+            "1581": 442918400.0,
+            "1582": 442918400.0,
+            "1583": 442918400.0,
+            "1584": 442918400.0,
+            "1585": 442918400.0,
+            "1586": 442918400.0,
+            "1587": 442918400.0,
+            "1588": 442918400.0,
+            "1589": 442918400.0,
+            "1590": 442918400.0,
+            "1591": 442918400.0,
+            "1592": 442918400.0,
+            "1593": 442918400.0,
+            "1594": 442918400.0,
+            "1595": 442918400.0,
+            "1596": 442918400.0,
+            "1597": 442918400.0,
+            "1598": 442918400.0,
+            "1599": 442918400.0,
+            "1600": 442918400.0,
+            "1601": 442918400.0,
+            "1602": 442918400.0,
+            "1603": 442918400.0,
+            "1604": 442918400.0,
+            "1605": 442918400.0,
+            "1606": 442918400.0,
+            "1607": 442918400.0,
+            "1608": 442918400.0,
+            "1609": 442918400.0,
+            "1610": 442918400.0,
+            "1611": 442918400.0,
+            "1612": 442918400.0,
+            "1613": 442918400.0,
+            "1614": 442918400.0,
+            "1615": 442918400.0,
+            "1616": 442918400.0,
+            "1617": 442918400.0,
+            "1618": 442918400.0,
+            "1619": 442918400.0,
+            "1620": 442918400.0,
+            "1621": 442918400.0,
+            "1622": 442918400.0,
+            "1623": 442918400.0,
+            "1624": 442918400.0,
+            "1625": 442918400.0,
+            "1626": 442918400.0,
+            "1627": 442918400.0,
+            "1628": 442918400.0,
+            "1629": 442918400.0,
+            "1630": 442918400.0,
+            "1631": 442918400.0,
+            "1632": 442918400.0,
+            "1633": 442918400.0,
+            "1634": 442918400.0,
+            "1635": 442918400.0,
+            "1636": 442918400.0,
+            "1637": 442918400.0,
+            "1638": 442918400.0,
+            "1639": 442918400.0,
+            "1640": 442918400.0,
+            "1641": 442918400.0,
+            "1642": 442918400.0,
+            "1643": 442918400.0,
+            "1644": 442918400.0,
+            "1645": 442918400.0,
+            "1646": 442918400.0,
+            "1647": 442918400.0,
+            "1648": 442918400.0,
+            "1649": 442918400.0,
+            "1650": 442918400.0,
+            "1651": 442918400.0,
+            "1652": 442918400.0,
+            "1653": 442918400.0,
+            "1654": 442918400.0,
+            "1655": 442918400.0,
+            "1656": 442918400.0,
+            "1657": 442918400.0,
+            "1658": 442918400.0,
+            "1659": 442918400.0,
+            "1660": 442918400.0,
+            "1661": 442918400.0,
+            "1662": 442918400.0,
+            "1663": 442918400.0,
+            "1664": 442918400.0,
+            "1665": 442918400.0,
+            "1666": 442918400.0,
+            "1667": 442918400.0,
+            "1668": 442918400.0,
+            "1669": 442918400.0,
+            "1670": 442918400.0,
+            "1671": 442918400.0,
+            "1672": 442918400.0,
+            "1673": 442918400.0,
+            "1674": 442918400.0,
+            "1675": 442918400.0,
+            "1676": 442918400.0,
+            "1677": 442918400.0,
+            "1678": 442918400.0,
+            "1679": 442918400.0,
+            "1680": 442918400.0,
+            "1681": 442918400.0,
+            "1682": 442918400.0,
+            "1683": 442918400.0,
+            "1684": 442918400.0,
+            "1685": 442918400.0,
+            "1686": 442918400.0,
+            "1687": 442918400.0,
+            "1688": 442918400.0,
+            "1689": 442918400.0,
+            "1690": 442918400.0,
+            "1691": 442918400.0,
+            "1692": 442918400.0,
+            "1693": 442918400.0,
+            "1694": 442918400.0,
+            "1695": 442918400.0,
+            "1696": 442918400.0,
+            "1697": 442918400.0,
+            "1698": 442918400.0,
+            "1699": 442918400.0,
+            "1700": 442918400.0,
+            "1701": 442918400.0,
+            "1702": 442918400.0,
+            "1703": 442918400.0,
+            "1704": 442918400.0,
+            "1705": 442918400.0,
+            "1706": 442918400.0,
+            "1707": 442918400.0,
+            "1708": 442918400.0,
+            "1709": 442918400.0,
+            "1710": 442918400.0,
+            "1711": 442918400.0,
+            "1712": 442918400.0,
+            "1713": 442918400.0,
+            "1714": 442918400.0,
+            "1715": 442918400.0,
+            "1716": 442918400.0,
+            "1717": 442918400.0,
+            "1718": 442918400.0,
+            "1719": 442918400.0,
+            "1720": 442918400.0,
+            "1721": 442918400.0,
+            "1722": 442918400.0,
+            "1723": 442918400.0,
+            "1724": 442918400.0,
+            "1725": 442918400.0,
+            "1726": 442918400.0,
+            "1727": 442918400.0,
+            "1728": 442918400.0,
+            "1729": 442918400.0,
+            "1730": 442918400.0,
+            "1731": 442918400.0,
+            "1732": 442918400.0,
+            "1733": 442918400.0,
+            "1734": 442918400.0,
+            "1735": 442918400.0,
+            "1736": 442918400.0,
+            "1737": 442918400.0,
+            "1738": 442918400.0,
+            "1739": 442918400.0,
+            "1740": 442918400.0,
+            "1741": 442918400.0,
+            "1742": 442918400.0,
+            "1743": 442918400.0,
+            "1744": 442918400.0,
+            "1745": 442918400.0,
+            "1746": 442918400.0,
+            "1747": 442918400.0,
+            "1748": 442918400.0,
+            "1749": 442918400.0,
+            "1750": 442918400.0,
+            "1751": 442918400.0,
+            "1752": 442918400.0,
+            "1753": 442918400.0,
+            "1754": 442918400.0,
+            "1755": 442918400.0,
+            "1756": 442918400.0,
+            "1757": 442918400.0,
+            "1758": 442918400.0,
+            "1759": 442918400.0,
+            "1760": 442918400.0,
+            "1761": 442918400.0,
+            "1762": 442918400.0,
+            "1763": 442918400.0,
+            "1764": 442918400.0,
+            "1765": 442918400.0,
+            "1766": 442918400.0,
+            "1767": 442918400.0,
+            "1768": 442918400.0,
+            "1769": 442918400.0,
+            "1770": 442918400.0,
+            "1771": 442918400.0,
+            "1772": 442918400.0,
+            "1773": 442918400.0,
+            "1774": 442918400.0,
+            "1775": 442918400.0,
+            "1776": 442918400.0,
+            "1777": 442918400.0,
+            "1778": 442918400.0,
+            "1779": 442918400.0,
+            "1780": 442918400.0,
+            "1781": 442918400.0,
+            "1782": 442918400.0,
+            "1783": 442918400.0,
+            "1784": 442918400.0,
+            "1785": 442918400.0,
+            "1786": 442918400.0,
+            "1787": 442918400.0,
+            "1788": 442918400.0,
+            "1789": 442918400.0,
+            "1790": 442918400.0,
+            "1791": 442918400.0,
+            "1792": 442918400.0,
+            "1793": 442918400.0,
+            "1794": 442918400.0,
+            "1795": 442918400.0,
+            "1796": 442918400.0,
+            "1797": 442918400.0,
+            "1798": 442918400.0,
+            "1799": 442918400.0,
+            "1800": 442918400.0,
+            "1801": 442918400.0,
+            "1802": 442918400.0,
+            "1803": 442918400.0,
+            "1804": 442918400.0,
+            "1805": 442918400.0,
+            "1806": 442918400.0,
+            "1807": 442918400.0,
+            "1808": 442918400.0,
+            "1809": 442918400.0,
+            "1810": 442918400.0,
+            "1811": 442918400.0,
+            "1812": 442918400.0,
+            "1813": 442918400.0,
+            "1814": 442918400.0,
+            "1815": 442918400.0,
+            "1816": 442918400.0,
+            "1817": 442918400.0,
+            "1818": 442918400.0,
+            "1819": 442918400.0,
+            "1820": 442918400.0,
+            "1821": 442918400.0,
+            "1822": 442918400.0,
+            "1823": 442918400.0,
+            "1824": 442918400.0,
+            "1825": 442918400.0,
+            "1826": 442918400.0,
+            "1827": 442918400.0,
+            "1828": 442918400.0,
+            "1829": 442918400.0,
+            "1830": 442918400.0,
+            "1831": 442918400.0,
+            "1832": 442918400.0,
+            "1833": 442918400.0,
+            "1834": 442918400.0,
+            "1835": 442918400.0,
+            "1836": 442918400.0,
+            "1837": 442918400.0,
+            "1838": 442918400.0,
+            "1839": 442918400.0,
+            "1840": 442918400.0,
+            "1841": 442918400.0,
+            "1842": 442918400.0,
+            "1843": 442918400.0,
+            "1844": 442918400.0,
+            "1845": 442918400.0,
+            "1846": 442918400.0,
+            "1847": 442918400.0,
+            "1848": 442918400.0,
+            "1849": 442918400.0,
+            "1850": 442918400.0,
+            "1851": 442918400.0,
+            "1852": 442918400.0,
+            "1853": 442918400.0,
+            "1854": 442918400.0,
+            "1855": 442918400.0,
+            "1856": 442918400.0,
+            "1857": 442918400.0,
+            "1858": 442918400.0,
+            "1859": 442918400.0,
+            "1860": 442918400.0,
+            "1861": 442918400.0,
+            "1862": 442918400.0,
+            "1863": 442918400.0,
+            "1864": 442918400.0,
+            "1865": 442918400.0,
+            "1866": 442918400.0,
+            "1867": 442918400.0,
+            "1868": 442918400.0,
+            "1869": 442918400.0,
+            "1870": 442918400.0,
+            "1871": 442918400.0,
+            "1872": 442918400.0,
+            "1873": 442918400.0,
+            "1874": 442918400.0,
+            "1875": 442918400.0,
+            "1876": 442918400.0,
+            "1877": 442918400.0,
+            "1878": 442918400.0,
+            "1879": 442918400.0,
+            "1880": 442918400.0,
+            "1881": 442918400.0,
+            "1882": 442918400.0,
+            "1883": 442918400.0,
+            "1884": 442918400.0,
+            "1885": 442918400.0,
+            "1886": 442918400.0,
+            "1887": 442918400.0,
+            "1888": 442918400.0,
+            "1889": 442918400.0,
+            "1890": 442918400.0,
+            "1891": 442918400.0,
+            "1892": 442918400.0,
+            "1893": 442918400.0,
+            "1894": 442918400.0,
+            "1895": 442918400.0,
+            "1896": 442918400.0,
+            "1897": 442918400.0,
+            "1898": 442918400.0,
+            "1899": 442918400.0,
+            "1900": 442918400.0,
+            "1901": 442918400.0,
+            "1902": 442918400.0,
+            "1903": 442918400.0,
+            "1904": 442918400.0,
+            "1905": 442918400.0,
+            "1906": 442918400.0,
+            "1907": 442918400.0,
+            "1908": 442918400.0,
+            "1909": 442918400.0,
+            "1910": 442918400.0,
+            "1911": 442918400.0,
+            "1912": 442918400.0,
+            "1913": 442918400.0,
+            "1914": 442918400.0,
+            "1915": 442918400.0,
+            "1916": 442918400.0,
+            "1917": 442918400.0,
+            "1918": 442918400.0,
+            "1919": 442918400.0,
+            "1920": 442918400.0,
+            "1921": 442918400.0,
+            "1922": 442918400.0,
+            "1923": 442918400.0,
+            "1924": 442918400.0,
+            "1925": 442918400.0,
+            "1926": 442918400.0,
+            "1927": 442918400.0,
+            "1928": 442918400.0,
+            "1929": 442918400.0,
+            "1930": 442918400.0,
+            "1931": 442918400.0,
+            "1932": 442918400.0,
+            "1933": 442918400.0,
+            "1934": 442918400.0,
+            "1935": 442918400.0,
+            "1936": 442918400.0,
+            "1937": 442918400.0,
+            "1938": 442918400.0,
+            "1939": 442918400.0,
+            "1940": 442918400.0,
+            "1941": 442918400.0,
+            "1942": 442918400.0,
+            "1943": 442918400.0,
+            "1944": 442918400.0,
+            "1945": 442918400.0,
+            "1946": 442918400.0,
+            "1947": 442918400.0,
+            "1948": 442918400.0,
+            "1949": 442918400.0,
+            "1950": 442918400.0,
+            "1951": 442918400.0,
+            "1952": 442918400.0,
+            "1953": 442918400.0,
+            "1954": 442918400.0,
+            "1955": 442918400.0,
+            "1956": 442918400.0,
+            "1957": 442918400.0,
+            "1958": 442918400.0,
+            "1959": 442918400.0,
+            "1960": 442918400.0,
+            "1961": 442918400.0,
+            "1962": 442918400.0,
+            "1963": 442918400.0,
+            "1964": 442918400.0,
+            "1965": 442918400.0,
+            "1966": 442918400.0,
+            "1967": 442918400.0,
+            "1968": 442918400.0,
+            "1969": 442918400.0,
+            "1970": 442918400.0,
+            "1971": 442918400.0,
+            "1972": 442918400.0,
+            "1973": 442918400.0,
+            "1974": 442918400.0,
+            "1975": 442918400.0,
+            "1976": 442918400.0,
+            "1977": 442918400.0,
+            "1978": 442918400.0,
+            "1979": 442918400.0,
+            "1980": 442918400.0,
+            "1981": 442918400.0,
+            "1982": 442918400.0,
+            "1983": 442918400.0,
+            "1984": 442918400.0,
+            "1985": 442918400.0,
+            "1986": 442918400.0,
+            "1987": 442918400.0,
+            "1988": 442918400.0,
+            "1989": 442918400.0,
+            "1990": 442918400.0,
+            "1991": 442918400.0,
+            "1992": 442918400.0,
+            "1993": 442918400.0,
+            "1994": 442918400.0,
+            "1995": 442918400.0,
+            "1996": 442918400.0,
+            "1997": 442918400.0,
+            "1998": 442918400.0,
+            "1999": 442918400.0,
+            "2000": 442918400.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 761183744.0,
+            "2": 849621504.0,
+            "3": 849621504.0,
+            "4": 849621504.0,
+            "5": 849621504.0,
+            "6": 849621504.0,
+            "7": 849621504.0,
+            "8": 849621504.0,
+            "9": 849621504.0,
+            "10": 849621504.0,
+            "11": 849621504.0,
+            "12": 849621504.0,
+            "13": 849621504.0,
+            "14": 849621504.0,
+            "15": 849621504.0,
+            "16": 849621504.0,
+            "17": 849621504.0,
+            "18": 849621504.0,
+            "19": 849621504.0,
+            "20": 849621504.0,
+            "21": 849621504.0,
+            "22": 849621504.0,
+            "23": 849621504.0,
+            "24": 849621504.0,
+            "25": 849621504.0,
+            "26": 849621504.0,
+            "27": 849621504.0,
+            "28": 849621504.0,
+            "29": 849621504.0,
+            "30": 849621504.0,
+            "31": 849621504.0,
+            "32": 849621504.0,
+            "33": 849621504.0,
+            "34": 849621504.0,
+            "35": 849621504.0,
+            "36": 849621504.0,
+            "37": 849621504.0,
+            "38": 849621504.0,
+            "39": 849621504.0,
+            "40": 849621504.0,
+            "41": 849621504.0,
+            "42": 849621504.0,
+            "43": 849621504.0,
+            "44": 849621504.0,
+            "45": 849621504.0,
+            "46": 849621504.0,
+            "47": 849621504.0,
+            "48": 849621504.0,
+            "49": 849621504.0,
+            "50": 849621504.0,
+            "51": 849621504.0,
+            "52": 849621504.0,
+            "53": 849621504.0,
+            "54": 849621504.0,
+            "55": 849621504.0,
+            "56": 849621504.0,
+            "57": 849621504.0,
+            "58": 849621504.0,
+            "59": 849621504.0,
+            "60": 849621504.0,
+            "61": 849621504.0,
+            "62": 849621504.0,
+            "63": 849621504.0,
+            "64": 849621504.0,
+            "65": 849621504.0,
+            "66": 849621504.0,
+            "67": 849621504.0,
+            "68": 849621504.0,
+            "69": 849621504.0,
+            "70": 849621504.0,
+            "71": 849621504.0,
+            "72": 849621504.0,
+            "73": 849621504.0,
+            "74": 849621504.0,
+            "75": 849621504.0,
+            "76": 849621504.0,
+            "77": 849621504.0,
+            "78": 849621504.0,
+            "79": 849621504.0,
+            "80": 849621504.0,
+            "81": 849621504.0,
+            "82": 849621504.0,
+            "83": 849621504.0,
+            "84": 849621504.0,
+            "85": 849621504.0,
+            "86": 849621504.0,
+            "87": 849621504.0,
+            "88": 849621504.0,
+            "89": 849621504.0,
+            "90": 849621504.0,
+            "91": 849621504.0,
+            "92": 849621504.0,
+            "93": 849621504.0,
+            "94": 849621504.0,
+            "95": 849621504.0,
+            "96": 849621504.0,
+            "97": 849621504.0,
+            "98": 849621504.0,
+            "99": 849621504.0,
+            "100": 849621504.0,
+            "101": 849621504.0,
+            "102": 849621504.0,
+            "103": 849621504.0,
+            "104": 849621504.0,
+            "105": 849621504.0,
+            "106": 849621504.0,
+            "107": 849621504.0,
+            "108": 849621504.0,
+            "109": 849621504.0,
+            "110": 849621504.0,
+            "111": 849621504.0,
+            "112": 849621504.0,
+            "113": 849621504.0,
+            "114": 849621504.0,
+            "115": 849621504.0,
+            "116": 849621504.0,
+            "117": 849621504.0,
+            "118": 849621504.0,
+            "119": 849621504.0,
+            "120": 849621504.0,
+            "121": 849621504.0,
+            "122": 849621504.0,
+            "123": 849621504.0,
+            "124": 849621504.0,
+            "125": 849621504.0,
+            "126": 849621504.0,
+            "127": 849621504.0,
+            "128": 849621504.0,
+            "129": 849621504.0,
+            "130": 849621504.0,
+            "131": 849621504.0,
+            "132": 849621504.0,
+            "133": 849621504.0,
+            "134": 849621504.0,
+            "135": 849621504.0,
+            "136": 849621504.0,
+            "137": 849621504.0,
+            "138": 849621504.0,
+            "139": 849621504.0,
+            "140": 849621504.0,
+            "141": 849621504.0,
+            "142": 849621504.0,
+            "143": 849621504.0,
+            "144": 849621504.0,
+            "145": 849621504.0,
+            "146": 849621504.0,
+            "147": 849621504.0,
+            "148": 849621504.0,
+            "149": 849621504.0,
+            "150": 849621504.0,
+            "151": 849621504.0,
+            "152": 849621504.0,
+            "153": 849621504.0,
+            "154": 849621504.0,
+            "155": 849621504.0,
+            "156": 849621504.0,
+            "157": 849621504.0,
+            "158": 849621504.0,
+            "159": 849621504.0,
+            "160": 849621504.0,
+            "161": 849621504.0,
+            "162": 849621504.0,
+            "163": 849621504.0,
+            "164": 849621504.0,
+            "165": 849621504.0,
+            "166": 849621504.0,
+            "167": 849621504.0,
+            "168": 849621504.0,
+            "169": 849621504.0,
+            "170": 849621504.0,
+            "171": 849621504.0,
+            "172": 849621504.0,
+            "173": 849621504.0,
+            "174": 849621504.0,
+            "175": 849621504.0,
+            "176": 849621504.0,
+            "177": 849621504.0,
+            "178": 849621504.0,
+            "179": 849621504.0,
+            "180": 849621504.0,
+            "181": 849621504.0,
+            "182": 849621504.0,
+            "183": 849621504.0,
+            "184": 849621504.0,
+            "185": 849621504.0,
+            "186": 849621504.0,
+            "187": 849621504.0,
+            "188": 849621504.0,
+            "189": 849621504.0,
+            "190": 849621504.0,
+            "191": 849621504.0,
+            "192": 849621504.0,
+            "193": 849621504.0,
+            "194": 849621504.0,
+            "195": 849621504.0,
+            "196": 849621504.0,
+            "197": 849621504.0,
+            "198": 849621504.0,
+            "199": 849621504.0,
+            "200": 849621504.0,
+            "201": 849621504.0,
+            "202": 849621504.0,
+            "203": 849621504.0,
+            "204": 849621504.0,
+            "205": 849621504.0,
+            "206": 849621504.0,
+            "207": 849621504.0,
+            "208": 849621504.0,
+            "209": 849621504.0,
+            "210": 849621504.0,
+            "211": 849621504.0,
+            "212": 849621504.0,
+            "213": 849621504.0,
+            "214": 849621504.0,
+            "215": 849621504.0,
+            "216": 849621504.0,
+            "217": 849621504.0,
+            "218": 849621504.0,
+            "219": 849621504.0,
+            "220": 849621504.0,
+            "221": 849621504.0,
+            "222": 849621504.0,
+            "223": 849621504.0,
+            "224": 849621504.0,
+            "225": 849621504.0,
+            "226": 849621504.0,
+            "227": 849621504.0,
+            "228": 849621504.0,
+            "229": 849621504.0,
+            "230": 849621504.0,
+            "231": 849621504.0,
+            "232": 849621504.0,
+            "233": 849621504.0,
+            "234": 849621504.0,
+            "235": 849621504.0,
+            "236": 849621504.0,
+            "237": 849621504.0,
+            "238": 849621504.0,
+            "239": 849621504.0,
+            "240": 849621504.0,
+            "241": 849621504.0,
+            "242": 849621504.0,
+            "243": 849621504.0,
+            "244": 849621504.0,
+            "245": 849621504.0,
+            "246": 849621504.0,
+            "247": 849621504.0,
+            "248": 849621504.0,
+            "249": 849621504.0,
+            "250": 849621504.0,
+            "251": 849621504.0,
+            "252": 849621504.0,
+            "253": 849621504.0,
+            "254": 849621504.0,
+            "255": 849621504.0,
+            "256": 849621504.0,
+            "257": 849621504.0,
+            "258": 849621504.0,
+            "259": 849621504.0,
+            "260": 849621504.0,
+            "261": 849621504.0,
+            "262": 849621504.0,
+            "263": 849621504.0,
+            "264": 849621504.0,
+            "265": 849621504.0,
+            "266": 849621504.0,
+            "267": 849621504.0,
+            "268": 849621504.0,
+            "269": 849621504.0,
+            "270": 849621504.0,
+            "271": 849621504.0,
+            "272": 849621504.0,
+            "273": 849621504.0,
+            "274": 849621504.0,
+            "275": 849621504.0,
+            "276": 849621504.0,
+            "277": 849621504.0,
+            "278": 849621504.0,
+            "279": 849621504.0,
+            "280": 849621504.0,
+            "281": 849621504.0,
+            "282": 849621504.0,
+            "283": 849621504.0,
+            "284": 849621504.0,
+            "285": 849621504.0,
+            "286": 849621504.0,
+            "287": 849621504.0,
+            "288": 849621504.0,
+            "289": 849621504.0,
+            "290": 849621504.0,
+            "291": 849621504.0,
+            "292": 849621504.0,
+            "293": 849621504.0,
+            "294": 849621504.0,
+            "295": 849621504.0,
+            "296": 849621504.0,
+            "297": 849621504.0,
+            "298": 849621504.0,
+            "299": 849621504.0,
+            "300": 849621504.0,
+            "301": 849621504.0,
+            "302": 849621504.0,
+            "303": 849621504.0,
+            "304": 849621504.0,
+            "305": 849621504.0,
+            "306": 849621504.0,
+            "307": 849621504.0,
+            "308": 849621504.0,
+            "309": 849621504.0,
+            "310": 849621504.0,
+            "311": 849621504.0,
+            "312": 849621504.0,
+            "313": 849621504.0,
+            "314": 849621504.0,
+            "315": 849621504.0,
+            "316": 849621504.0,
+            "317": 849621504.0,
+            "318": 849621504.0,
+            "319": 849621504.0,
+            "320": 849621504.0,
+            "321": 849621504.0,
+            "322": 849621504.0,
+            "323": 849621504.0,
+            "324": 849621504.0,
+            "325": 849621504.0,
+            "326": 849621504.0,
+            "327": 849621504.0,
+            "328": 849621504.0,
+            "329": 849621504.0,
+            "330": 849621504.0,
+            "331": 849621504.0,
+            "332": 849621504.0,
+            "333": 849621504.0,
+            "334": 849621504.0,
+            "335": 849621504.0,
+            "336": 849621504.0,
+            "337": 849621504.0,
+            "338": 849621504.0,
+            "339": 849621504.0,
+            "340": 849621504.0,
+            "341": 849621504.0,
+            "342": 849621504.0,
+            "343": 849621504.0,
+            "344": 849621504.0,
+            "345": 849621504.0,
+            "346": 849621504.0,
+            "347": 849621504.0,
+            "348": 849621504.0,
+            "349": 849621504.0,
+            "350": 849621504.0,
+            "351": 849621504.0,
+            "352": 849621504.0,
+            "353": 849621504.0,
+            "354": 849621504.0,
+            "355": 849621504.0,
+            "356": 849621504.0,
+            "357": 849621504.0,
+            "358": 849621504.0,
+            "359": 849621504.0,
+            "360": 849621504.0,
+            "361": 849621504.0,
+            "362": 849621504.0,
+            "363": 849621504.0,
+            "364": 849621504.0,
+            "365": 849621504.0,
+            "366": 849621504.0,
+            "367": 849621504.0,
+            "368": 849621504.0,
+            "369": 849621504.0,
+            "370": 849621504.0,
+            "371": 849621504.0,
+            "372": 849621504.0,
+            "373": 849621504.0,
+            "374": 849621504.0,
+            "375": 849621504.0,
+            "376": 849621504.0,
+            "377": 849621504.0,
+            "378": 849621504.0,
+            "379": 849621504.0,
+            "380": 849621504.0,
+            "381": 849621504.0,
+            "382": 849621504.0,
+            "383": 849621504.0,
+            "384": 849621504.0,
+            "385": 849621504.0,
+            "386": 849621504.0,
+            "387": 849621504.0,
+            "388": 849621504.0,
+            "389": 849621504.0,
+            "390": 849621504.0,
+            "391": 849621504.0,
+            "392": 849621504.0,
+            "393": 849621504.0,
+            "394": 849621504.0,
+            "395": 849621504.0,
+            "396": 849621504.0,
+            "397": 849621504.0,
+            "398": 849621504.0,
+            "399": 849621504.0,
+            "400": 849621504.0,
+            "401": 849621504.0,
+            "402": 849621504.0,
+            "403": 849621504.0,
+            "404": 849621504.0,
+            "405": 849621504.0,
+            "406": 849621504.0,
+            "407": 849621504.0,
+            "408": 849621504.0,
+            "409": 849621504.0,
+            "410": 849621504.0,
+            "411": 849621504.0,
+            "412": 849621504.0,
+            "413": 849621504.0,
+            "414": 849621504.0,
+            "415": 849621504.0,
+            "416": 849621504.0,
+            "417": 849621504.0,
+            "418": 849621504.0,
+            "419": 849621504.0,
+            "420": 849621504.0,
+            "421": 849621504.0,
+            "422": 849621504.0,
+            "423": 849621504.0,
+            "424": 849621504.0,
+            "425": 849621504.0,
+            "426": 849621504.0,
+            "427": 849621504.0,
+            "428": 849621504.0,
+            "429": 849621504.0,
+            "430": 849621504.0,
+            "431": 849621504.0,
+            "432": 849621504.0,
+            "433": 849621504.0,
+            "434": 849621504.0,
+            "435": 849621504.0,
+            "436": 849621504.0,
+            "437": 849621504.0,
+            "438": 849621504.0,
+            "439": 849621504.0,
+            "440": 849621504.0,
+            "441": 849621504.0,
+            "442": 849621504.0,
+            "443": 849621504.0,
+            "444": 849621504.0,
+            "445": 849621504.0,
+            "446": 849621504.0,
+            "447": 849621504.0,
+            "448": 849621504.0,
+            "449": 849621504.0,
+            "450": 849621504.0,
+            "451": 849621504.0,
+            "452": 849621504.0,
+            "453": 849621504.0,
+            "454": 849621504.0,
+            "455": 849621504.0,
+            "456": 849621504.0,
+            "457": 849621504.0,
+            "458": 849621504.0,
+            "459": 849621504.0,
+            "460": 849621504.0,
+            "461": 849621504.0,
+            "462": 849621504.0,
+            "463": 849621504.0,
+            "464": 849621504.0,
+            "465": 849621504.0,
+            "466": 849621504.0,
+            "467": 849621504.0,
+            "468": 849621504.0,
+            "469": 849621504.0,
+            "470": 849621504.0,
+            "471": 849621504.0,
+            "472": 849621504.0,
+            "473": 849621504.0,
+            "474": 849621504.0,
+            "475": 849621504.0,
+            "476": 849621504.0,
+            "477": 849621504.0,
+            "478": 849621504.0,
+            "479": 849621504.0,
+            "480": 849621504.0,
+            "481": 849621504.0,
+            "482": 849621504.0,
+            "483": 849621504.0,
+            "484": 849621504.0,
+            "485": 849621504.0,
+            "486": 849621504.0,
+            "487": 849621504.0,
+            "488": 849621504.0,
+            "489": 849621504.0,
+            "490": 849621504.0,
+            "491": 849621504.0,
+            "492": 849621504.0,
+            "493": 849621504.0,
+            "494": 849621504.0,
+            "495": 849621504.0,
+            "496": 849621504.0,
+            "497": 849621504.0,
+            "498": 849621504.0,
+            "499": 849621504.0,
+            "500": 849621504.0,
+            "501": 849621504.0,
+            "502": 849621504.0,
+            "503": 849621504.0,
+            "504": 849621504.0,
+            "505": 849621504.0,
+            "506": 849621504.0,
+            "507": 849621504.0,
+            "508": 849621504.0,
+            "509": 849621504.0,
+            "510": 849621504.0,
+            "511": 849621504.0,
+            "512": 849621504.0,
+            "513": 849621504.0,
+            "514": 849621504.0,
+            "515": 849621504.0,
+            "516": 849621504.0,
+            "517": 849621504.0,
+            "518": 849621504.0,
+            "519": 849621504.0,
+            "520": 849621504.0,
+            "521": 849621504.0,
+            "522": 849621504.0,
+            "523": 849621504.0,
+            "524": 849621504.0,
+            "525": 849621504.0,
+            "526": 849621504.0,
+            "527": 849621504.0,
+            "528": 849621504.0,
+            "529": 849621504.0,
+            "530": 849621504.0,
+            "531": 849621504.0,
+            "532": 849621504.0,
+            "533": 849621504.0,
+            "534": 849621504.0,
+            "535": 849621504.0,
+            "536": 849621504.0,
+            "537": 849621504.0,
+            "538": 849621504.0,
+            "539": 849621504.0,
+            "540": 849621504.0,
+            "541": 849621504.0,
+            "542": 849621504.0,
+            "543": 849621504.0,
+            "544": 849621504.0,
+            "545": 849621504.0,
+            "546": 849621504.0,
+            "547": 849621504.0,
+            "548": 849621504.0,
+            "549": 849621504.0,
+            "550": 849621504.0,
+            "551": 849621504.0,
+            "552": 849621504.0,
+            "553": 849621504.0,
+            "554": 849621504.0,
+            "555": 849621504.0,
+            "556": 849621504.0,
+            "557": 849621504.0,
+            "558": 849621504.0,
+            "559": 849621504.0,
+            "560": 849621504.0,
+            "561": 849621504.0,
+            "562": 849621504.0,
+            "563": 849621504.0,
+            "564": 849621504.0,
+            "565": 849621504.0,
+            "566": 849621504.0,
+            "567": 849621504.0,
+            "568": 849621504.0,
+            "569": 849621504.0,
+            "570": 849621504.0,
+            "571": 849621504.0,
+            "572": 849621504.0,
+            "573": 849621504.0,
+            "574": 849621504.0,
+            "575": 849621504.0,
+            "576": 849621504.0,
+            "577": 849621504.0,
+            "578": 849621504.0,
+            "579": 849621504.0,
+            "580": 849621504.0,
+            "581": 849621504.0,
+            "582": 849621504.0,
+            "583": 849621504.0,
+            "584": 849621504.0,
+            "585": 849621504.0,
+            "586": 849621504.0,
+            "587": 849621504.0,
+            "588": 849621504.0,
+            "589": 849621504.0,
+            "590": 849621504.0,
+            "591": 849621504.0,
+            "592": 849621504.0,
+            "593": 849621504.0,
+            "594": 849621504.0,
+            "595": 849621504.0,
+            "596": 849621504.0,
+            "597": 849621504.0,
+            "598": 849621504.0,
+            "599": 849621504.0,
+            "600": 849621504.0,
+            "601": 849621504.0,
+            "602": 849621504.0,
+            "603": 849621504.0,
+            "604": 849621504.0,
+            "605": 849621504.0,
+            "606": 849621504.0,
+            "607": 849621504.0,
+            "608": 849621504.0,
+            "609": 849621504.0,
+            "610": 849621504.0,
+            "611": 849621504.0,
+            "612": 849621504.0,
+            "613": 849621504.0,
+            "614": 849621504.0,
+            "615": 849621504.0,
+            "616": 849621504.0,
+            "617": 849621504.0,
+            "618": 849621504.0,
+            "619": 849621504.0,
+            "620": 849621504.0,
+            "621": 849621504.0,
+            "622": 849621504.0,
+            "623": 849621504.0,
+            "624": 849621504.0,
+            "625": 849621504.0,
+            "626": 849621504.0,
+            "627": 849621504.0,
+            "628": 849621504.0,
+            "629": 849621504.0,
+            "630": 849621504.0,
+            "631": 849621504.0,
+            "632": 849621504.0,
+            "633": 849621504.0,
+            "634": 849621504.0,
+            "635": 849621504.0,
+            "636": 849621504.0,
+            "637": 849621504.0,
+            "638": 849621504.0,
+            "639": 849621504.0,
+            "640": 849621504.0,
+            "641": 849621504.0,
+            "642": 849621504.0,
+            "643": 849621504.0,
+            "644": 849621504.0,
+            "645": 849621504.0,
+            "646": 849621504.0,
+            "647": 849621504.0,
+            "648": 849621504.0,
+            "649": 849621504.0,
+            "650": 849621504.0,
+            "651": 849621504.0,
+            "652": 849621504.0,
+            "653": 849621504.0,
+            "654": 849621504.0,
+            "655": 849621504.0,
+            "656": 849621504.0,
+            "657": 849621504.0,
+            "658": 849621504.0,
+            "659": 849621504.0,
+            "660": 849621504.0,
+            "661": 849621504.0,
+            "662": 849621504.0,
+            "663": 849621504.0,
+            "664": 849621504.0,
+            "665": 849621504.0,
+            "666": 849621504.0,
+            "667": 849621504.0,
+            "668": 849621504.0,
+            "669": 849621504.0,
+            "670": 849621504.0,
+            "671": 849621504.0,
+            "672": 849621504.0,
+            "673": 849621504.0,
+            "674": 849621504.0,
+            "675": 849621504.0,
+            "676": 849621504.0,
+            "677": 849621504.0,
+            "678": 849621504.0,
+            "679": 849621504.0,
+            "680": 849621504.0,
+            "681": 849621504.0,
+            "682": 849621504.0,
+            "683": 849621504.0,
+            "684": 849621504.0,
+            "685": 849621504.0,
+            "686": 849621504.0,
+            "687": 849621504.0,
+            "688": 849621504.0,
+            "689": 849621504.0,
+            "690": 849621504.0,
+            "691": 849621504.0,
+            "692": 849621504.0,
+            "693": 849621504.0,
+            "694": 849621504.0,
+            "695": 849621504.0,
+            "696": 849621504.0,
+            "697": 849621504.0,
+            "698": 849621504.0,
+            "699": 849621504.0,
+            "700": 849621504.0,
+            "701": 849621504.0,
+            "702": 849621504.0,
+            "703": 849621504.0,
+            "704": 849621504.0,
+            "705": 849621504.0,
+            "706": 849621504.0,
+            "707": 849621504.0,
+            "708": 849621504.0,
+            "709": 849621504.0,
+            "710": 849621504.0,
+            "711": 849621504.0,
+            "712": 849621504.0,
+            "713": 849621504.0,
+            "714": 849621504.0,
+            "715": 849621504.0,
+            "716": 849621504.0,
+            "717": 849621504.0,
+            "718": 849621504.0,
+            "719": 849621504.0,
+            "720": 849621504.0,
+            "721": 849621504.0,
+            "722": 849621504.0,
+            "723": 849621504.0,
+            "724": 849621504.0,
+            "725": 849621504.0,
+            "726": 849621504.0,
+            "727": 849621504.0,
+            "728": 849621504.0,
+            "729": 849621504.0,
+            "730": 849621504.0,
+            "731": 849621504.0,
+            "732": 849621504.0,
+            "733": 849621504.0,
+            "734": 849621504.0,
+            "735": 849621504.0,
+            "736": 849621504.0,
+            "737": 849621504.0,
+            "738": 849621504.0,
+            "739": 849621504.0,
+            "740": 849621504.0,
+            "741": 849621504.0,
+            "742": 849621504.0,
+            "743": 849621504.0,
+            "744": 849621504.0,
+            "745": 849621504.0,
+            "746": 849621504.0,
+            "747": 849621504.0,
+            "748": 849621504.0,
+            "749": 849621504.0,
+            "750": 849621504.0,
+            "751": 849621504.0,
+            "752": 849621504.0,
+            "753": 849621504.0,
+            "754": 849621504.0,
+            "755": 849621504.0,
+            "756": 849621504.0,
+            "757": 849621504.0,
+            "758": 849621504.0,
+            "759": 849621504.0,
+            "760": 849621504.0,
+            "761": 849621504.0,
+            "762": 849621504.0,
+            "763": 849621504.0,
+            "764": 849621504.0,
+            "765": 849621504.0,
+            "766": 849621504.0,
+            "767": 849621504.0,
+            "768": 849621504.0,
+            "769": 849621504.0,
+            "770": 849621504.0,
+            "771": 849621504.0,
+            "772": 849621504.0,
+            "773": 849621504.0,
+            "774": 849621504.0,
+            "775": 849621504.0,
+            "776": 849621504.0,
+            "777": 849621504.0,
+            "778": 849621504.0,
+            "779": 849621504.0,
+            "780": 849621504.0,
+            "781": 849621504.0,
+            "782": 849621504.0,
+            "783": 849621504.0,
+            "784": 849621504.0,
+            "785": 849621504.0,
+            "786": 849621504.0,
+            "787": 849621504.0,
+            "788": 849621504.0,
+            "789": 849621504.0,
+            "790": 849621504.0,
+            "791": 849621504.0,
+            "792": 849621504.0,
+            "793": 849621504.0,
+            "794": 849621504.0,
+            "795": 849621504.0,
+            "796": 849621504.0,
+            "797": 849621504.0,
+            "798": 849621504.0,
+            "799": 849621504.0,
+            "800": 849621504.0,
+            "801": 849621504.0,
+            "802": 849621504.0,
+            "803": 849621504.0,
+            "804": 849621504.0,
+            "805": 849621504.0,
+            "806": 849621504.0,
+            "807": 849621504.0,
+            "808": 849621504.0,
+            "809": 849621504.0,
+            "810": 849621504.0,
+            "811": 849621504.0,
+            "812": 849621504.0,
+            "813": 849621504.0,
+            "814": 849621504.0,
+            "815": 849621504.0,
+            "816": 849621504.0,
+            "817": 849621504.0,
+            "818": 849621504.0,
+            "819": 849621504.0,
+            "820": 849621504.0,
+            "821": 849621504.0,
+            "822": 849621504.0,
+            "823": 849621504.0,
+            "824": 849621504.0,
+            "825": 849621504.0,
+            "826": 849621504.0,
+            "827": 849621504.0,
+            "828": 849621504.0,
+            "829": 849621504.0,
+            "830": 849621504.0,
+            "831": 849621504.0,
+            "832": 849621504.0,
+            "833": 849621504.0,
+            "834": 849621504.0,
+            "835": 849621504.0,
+            "836": 849621504.0,
+            "837": 849621504.0,
+            "838": 849621504.0,
+            "839": 849621504.0,
+            "840": 849621504.0,
+            "841": 849621504.0,
+            "842": 849621504.0,
+            "843": 849621504.0,
+            "844": 849621504.0,
+            "845": 849621504.0,
+            "846": 849621504.0,
+            "847": 849621504.0,
+            "848": 849621504.0,
+            "849": 849621504.0,
+            "850": 849621504.0,
+            "851": 849621504.0,
+            "852": 849621504.0,
+            "853": 849621504.0,
+            "854": 849621504.0,
+            "855": 849621504.0,
+            "856": 849621504.0,
+            "857": 849621504.0,
+            "858": 849621504.0,
+            "859": 849621504.0,
+            "860": 849621504.0,
+            "861": 849621504.0,
+            "862": 849621504.0,
+            "863": 849621504.0,
+            "864": 849621504.0,
+            "865": 849621504.0,
+            "866": 849621504.0,
+            "867": 849621504.0,
+            "868": 849621504.0,
+            "869": 849621504.0,
+            "870": 849621504.0,
+            "871": 849621504.0,
+            "872": 849621504.0,
+            "873": 849621504.0,
+            "874": 849621504.0,
+            "875": 849621504.0,
+            "876": 849621504.0,
+            "877": 849621504.0,
+            "878": 849621504.0,
+            "879": 849621504.0,
+            "880": 849621504.0,
+            "881": 849621504.0,
+            "882": 849621504.0,
+            "883": 849621504.0,
+            "884": 849621504.0,
+            "885": 849621504.0,
+            "886": 849621504.0,
+            "887": 849621504.0,
+            "888": 849621504.0,
+            "889": 849621504.0,
+            "890": 849621504.0,
+            "891": 849621504.0,
+            "892": 849621504.0,
+            "893": 849621504.0,
+            "894": 849621504.0,
+            "895": 849621504.0,
+            "896": 849621504.0,
+            "897": 849621504.0,
+            "898": 849621504.0,
+            "899": 849621504.0,
+            "900": 849621504.0,
+            "901": 849621504.0,
+            "902": 849621504.0,
+            "903": 849621504.0,
+            "904": 849621504.0,
+            "905": 849621504.0,
+            "906": 849621504.0,
+            "907": 849621504.0,
+            "908": 849621504.0,
+            "909": 849621504.0,
+            "910": 849621504.0,
+            "911": 849621504.0,
+            "912": 849621504.0,
+            "913": 849621504.0,
+            "914": 849621504.0,
+            "915": 849621504.0,
+            "916": 849621504.0,
+            "917": 849621504.0,
+            "918": 849621504.0,
+            "919": 849621504.0,
+            "920": 849621504.0,
+            "921": 849621504.0,
+            "922": 849621504.0,
+            "923": 849621504.0,
+            "924": 849621504.0,
+            "925": 849621504.0,
+            "926": 849621504.0,
+            "927": 849621504.0,
+            "928": 849621504.0,
+            "929": 849621504.0,
+            "930": 849621504.0,
+            "931": 849621504.0,
+            "932": 849621504.0,
+            "933": 849621504.0,
+            "934": 849621504.0,
+            "935": 849621504.0,
+            "936": 849621504.0,
+            "937": 849621504.0,
+            "938": 849621504.0,
+            "939": 849621504.0,
+            "940": 849621504.0,
+            "941": 849621504.0,
+            "942": 849621504.0,
+            "943": 849621504.0,
+            "944": 849621504.0,
+            "945": 849621504.0,
+            "946": 849621504.0,
+            "947": 849621504.0,
+            "948": 849621504.0,
+            "949": 849621504.0,
+            "950": 849621504.0,
+            "951": 849621504.0,
+            "952": 849621504.0,
+            "953": 849621504.0,
+            "954": 849621504.0,
+            "955": 849621504.0,
+            "956": 849621504.0,
+            "957": 849621504.0,
+            "958": 849621504.0,
+            "959": 849621504.0,
+            "960": 849621504.0,
+            "961": 849621504.0,
+            "962": 849621504.0,
+            "963": 849621504.0,
+            "964": 849621504.0,
+            "965": 849621504.0,
+            "966": 849621504.0,
+            "967": 849621504.0,
+            "968": 849621504.0,
+            "969": 849621504.0,
+            "970": 849621504.0,
+            "971": 849621504.0,
+            "972": 849621504.0,
+            "973": 849621504.0,
+            "974": 849621504.0,
+            "975": 849621504.0,
+            "976": 849621504.0,
+            "977": 849621504.0,
+            "978": 849621504.0,
+            "979": 849621504.0,
+            "980": 849621504.0,
+            "981": 849621504.0,
+            "982": 849621504.0,
+            "983": 849621504.0,
+            "984": 849621504.0,
+            "985": 849621504.0,
+            "986": 849621504.0,
+            "987": 849621504.0,
+            "988": 849621504.0,
+            "989": 849621504.0,
+            "990": 849621504.0,
+            "991": 849621504.0,
+            "992": 849621504.0,
+            "993": 849621504.0,
+            "994": 849621504.0,
+            "995": 849621504.0,
+            "996": 849621504.0,
+            "997": 849621504.0,
+            "998": 849621504.0,
+            "999": 849621504.0,
+            "1000": 849621504.0,
+            "1001": 849621504.0,
+            "1002": 849621504.0,
+            "1003": 849621504.0,
+            "1004": 849621504.0,
+            "1005": 849621504.0,
+            "1006": 849621504.0,
+            "1007": 849621504.0,
+            "1008": 849621504.0,
+            "1009": 849621504.0,
+            "1010": 849621504.0,
+            "1011": 849621504.0,
+            "1012": 849621504.0,
+            "1013": 849621504.0,
+            "1014": 849621504.0,
+            "1015": 849621504.0,
+            "1016": 849621504.0,
+            "1017": 849621504.0,
+            "1018": 849621504.0,
+            "1019": 849621504.0,
+            "1020": 849621504.0,
+            "1021": 849621504.0,
+            "1022": 849621504.0,
+            "1023": 849621504.0,
+            "1024": 849621504.0,
+            "1025": 849621504.0,
+            "1026": 849621504.0,
+            "1027": 849621504.0,
+            "1028": 849621504.0,
+            "1029": 849621504.0,
+            "1030": 849621504.0,
+            "1031": 849621504.0,
+            "1032": 849621504.0,
+            "1033": 849621504.0,
+            "1034": 849621504.0,
+            "1035": 849621504.0,
+            "1036": 849621504.0,
+            "1037": 849621504.0,
+            "1038": 849621504.0,
+            "1039": 849621504.0,
+            "1040": 849621504.0,
+            "1041": 849621504.0,
+            "1042": 849621504.0,
+            "1043": 849621504.0,
+            "1044": 849621504.0,
+            "1045": 849621504.0,
+            "1046": 849621504.0,
+            "1047": 849621504.0,
+            "1048": 849621504.0,
+            "1049": 849621504.0,
+            "1050": 849621504.0,
+            "1051": 849621504.0,
+            "1052": 849621504.0,
+            "1053": 849621504.0,
+            "1054": 849621504.0,
+            "1055": 849621504.0,
+            "1056": 849621504.0,
+            "1057": 849621504.0,
+            "1058": 849621504.0,
+            "1059": 849621504.0,
+            "1060": 849621504.0,
+            "1061": 849621504.0,
+            "1062": 849621504.0,
+            "1063": 849621504.0,
+            "1064": 849621504.0,
+            "1065": 849621504.0,
+            "1066": 849621504.0,
+            "1067": 849621504.0,
+            "1068": 849621504.0,
+            "1069": 849621504.0,
+            "1070": 849621504.0,
+            "1071": 849621504.0,
+            "1072": 849621504.0,
+            "1073": 849621504.0,
+            "1074": 849621504.0,
+            "1075": 849621504.0,
+            "1076": 849621504.0,
+            "1077": 849621504.0,
+            "1078": 849621504.0,
+            "1079": 849621504.0,
+            "1080": 849621504.0,
+            "1081": 849621504.0,
+            "1082": 849621504.0,
+            "1083": 849621504.0,
+            "1084": 849621504.0,
+            "1085": 849621504.0,
+            "1086": 849621504.0,
+            "1087": 849621504.0,
+            "1088": 849621504.0,
+            "1089": 849621504.0,
+            "1090": 849621504.0,
+            "1091": 849621504.0,
+            "1092": 849621504.0,
+            "1093": 849621504.0,
+            "1094": 849621504.0,
+            "1095": 849621504.0,
+            "1096": 849621504.0,
+            "1097": 849621504.0,
+            "1098": 849621504.0,
+            "1099": 849621504.0,
+            "1100": 849621504.0,
+            "1101": 849621504.0,
+            "1102": 849621504.0,
+            "1103": 849621504.0,
+            "1104": 849621504.0,
+            "1105": 849621504.0,
+            "1106": 849621504.0,
+            "1107": 849621504.0,
+            "1108": 849621504.0,
+            "1109": 849621504.0,
+            "1110": 849621504.0,
+            "1111": 849621504.0,
+            "1112": 849621504.0,
+            "1113": 849621504.0,
+            "1114": 849621504.0,
+            "1115": 849621504.0,
+            "1116": 849621504.0,
+            "1117": 849621504.0,
+            "1118": 849621504.0,
+            "1119": 849621504.0,
+            "1120": 849621504.0,
+            "1121": 849621504.0,
+            "1122": 849621504.0,
+            "1123": 849621504.0,
+            "1124": 849621504.0,
+            "1125": 849621504.0,
+            "1126": 849621504.0,
+            "1127": 849621504.0,
+            "1128": 849621504.0,
+            "1129": 849621504.0,
+            "1130": 849621504.0,
+            "1131": 849621504.0,
+            "1132": 849621504.0,
+            "1133": 849621504.0,
+            "1134": 849621504.0,
+            "1135": 849621504.0,
+            "1136": 849621504.0,
+            "1137": 849621504.0,
+            "1138": 849621504.0,
+            "1139": 849621504.0,
+            "1140": 849621504.0,
+            "1141": 849621504.0,
+            "1142": 849621504.0,
+            "1143": 849621504.0,
+            "1144": 849621504.0,
+            "1145": 849621504.0,
+            "1146": 849621504.0,
+            "1147": 849621504.0,
+            "1148": 849621504.0,
+            "1149": 849621504.0,
+            "1150": 849621504.0,
+            "1151": 849621504.0,
+            "1152": 849621504.0,
+            "1153": 849621504.0,
+            "1154": 849621504.0,
+            "1155": 849621504.0,
+            "1156": 849621504.0,
+            "1157": 849621504.0,
+            "1158": 849621504.0,
+            "1159": 849621504.0,
+            "1160": 849621504.0,
+            "1161": 849621504.0,
+            "1162": 849621504.0,
+            "1163": 849621504.0,
+            "1164": 849621504.0,
+            "1165": 849621504.0,
+            "1166": 849621504.0,
+            "1167": 849621504.0,
+            "1168": 849621504.0,
+            "1169": 849621504.0,
+            "1170": 849621504.0,
+            "1171": 849621504.0,
+            "1172": 849621504.0,
+            "1173": 849621504.0,
+            "1174": 849621504.0,
+            "1175": 849621504.0,
+            "1176": 849621504.0,
+            "1177": 849621504.0,
+            "1178": 849621504.0,
+            "1179": 849621504.0,
+            "1180": 849621504.0,
+            "1181": 849621504.0,
+            "1182": 849621504.0,
+            "1183": 849621504.0,
+            "1184": 849621504.0,
+            "1185": 849621504.0,
+            "1186": 849621504.0,
+            "1187": 849621504.0,
+            "1188": 849621504.0,
+            "1189": 849621504.0,
+            "1190": 849621504.0,
+            "1191": 849621504.0,
+            "1192": 849621504.0,
+            "1193": 849621504.0,
+            "1194": 849621504.0,
+            "1195": 849621504.0,
+            "1196": 849621504.0,
+            "1197": 849621504.0,
+            "1198": 849621504.0,
+            "1199": 849621504.0,
+            "1200": 849621504.0,
+            "1201": 849621504.0,
+            "1202": 849621504.0,
+            "1203": 849621504.0,
+            "1204": 849621504.0,
+            "1205": 849621504.0,
+            "1206": 849621504.0,
+            "1207": 849621504.0,
+            "1208": 849621504.0,
+            "1209": 849621504.0,
+            "1210": 849621504.0,
+            "1211": 849621504.0,
+            "1212": 849621504.0,
+            "1213": 849621504.0,
+            "1214": 849621504.0,
+            "1215": 849621504.0,
+            "1216": 849621504.0,
+            "1217": 849621504.0,
+            "1218": 849621504.0,
+            "1219": 849621504.0,
+            "1220": 849621504.0,
+            "1221": 849621504.0,
+            "1222": 849621504.0,
+            "1223": 849621504.0,
+            "1224": 849621504.0,
+            "1225": 849621504.0,
+            "1226": 849621504.0,
+            "1227": 849621504.0,
+            "1228": 849621504.0,
+            "1229": 849621504.0,
+            "1230": 849621504.0,
+            "1231": 849621504.0,
+            "1232": 849621504.0,
+            "1233": 849621504.0,
+            "1234": 849621504.0,
+            "1235": 849621504.0,
+            "1236": 849621504.0,
+            "1237": 849621504.0,
+            "1238": 849621504.0,
+            "1239": 849621504.0,
+            "1240": 849621504.0,
+            "1241": 849621504.0,
+            "1242": 849621504.0,
+            "1243": 849621504.0,
+            "1244": 849621504.0,
+            "1245": 849621504.0,
+            "1246": 849621504.0,
+            "1247": 849621504.0,
+            "1248": 849621504.0,
+            "1249": 849621504.0,
+            "1250": 849621504.0,
+            "1251": 849621504.0,
+            "1252": 849621504.0,
+            "1253": 849621504.0,
+            "1254": 849621504.0,
+            "1255": 849621504.0,
+            "1256": 849621504.0,
+            "1257": 849621504.0,
+            "1258": 849621504.0,
+            "1259": 849621504.0,
+            "1260": 849621504.0,
+            "1261": 849621504.0,
+            "1262": 849621504.0,
+            "1263": 849621504.0,
+            "1264": 849621504.0,
+            "1265": 849621504.0,
+            "1266": 849621504.0,
+            "1267": 849621504.0,
+            "1268": 849621504.0,
+            "1269": 849621504.0,
+            "1270": 849621504.0,
+            "1271": 849621504.0,
+            "1272": 849621504.0,
+            "1273": 849621504.0,
+            "1274": 849621504.0,
+            "1275": 849621504.0,
+            "1276": 849621504.0,
+            "1277": 849621504.0,
+            "1278": 849621504.0,
+            "1279": 849621504.0,
+            "1280": 849621504.0,
+            "1281": 849621504.0,
+            "1282": 849621504.0,
+            "1283": 849621504.0,
+            "1284": 849621504.0,
+            "1285": 849621504.0,
+            "1286": 849621504.0,
+            "1287": 849621504.0,
+            "1288": 849621504.0,
+            "1289": 849621504.0,
+            "1290": 849621504.0,
+            "1291": 849621504.0,
+            "1292": 849621504.0,
+            "1293": 849621504.0,
+            "1294": 849621504.0,
+            "1295": 849621504.0,
+            "1296": 849621504.0,
+            "1297": 849621504.0,
+            "1298": 849621504.0,
+            "1299": 849621504.0,
+            "1300": 849621504.0,
+            "1301": 849621504.0,
+            "1302": 849621504.0,
+            "1303": 849621504.0,
+            "1304": 849621504.0,
+            "1305": 849621504.0,
+            "1306": 849621504.0,
+            "1307": 849621504.0,
+            "1308": 849621504.0,
+            "1309": 849621504.0,
+            "1310": 849621504.0,
+            "1311": 849621504.0,
+            "1312": 849621504.0,
+            "1313": 849621504.0,
+            "1314": 849621504.0,
+            "1315": 849621504.0,
+            "1316": 849621504.0,
+            "1317": 849621504.0,
+            "1318": 849621504.0,
+            "1319": 849621504.0,
+            "1320": 849621504.0,
+            "1321": 849621504.0,
+            "1322": 849621504.0,
+            "1323": 849621504.0,
+            "1324": 849621504.0,
+            "1325": 849621504.0,
+            "1326": 849621504.0,
+            "1327": 849621504.0,
+            "1328": 849621504.0,
+            "1329": 849621504.0,
+            "1330": 849621504.0,
+            "1331": 849621504.0,
+            "1332": 849621504.0,
+            "1333": 849621504.0,
+            "1334": 849621504.0,
+            "1335": 849621504.0,
+            "1336": 849621504.0,
+            "1337": 849621504.0,
+            "1338": 849621504.0,
+            "1339": 849621504.0,
+            "1340": 849621504.0,
+            "1341": 849621504.0,
+            "1342": 849621504.0,
+            "1343": 849621504.0,
+            "1344": 849621504.0,
+            "1345": 849621504.0,
+            "1346": 849621504.0,
+            "1347": 849621504.0,
+            "1348": 849621504.0,
+            "1349": 849621504.0,
+            "1350": 849621504.0,
+            "1351": 849621504.0,
+            "1352": 849621504.0,
+            "1353": 849621504.0,
+            "1354": 849621504.0,
+            "1355": 849621504.0,
+            "1356": 849621504.0,
+            "1357": 849621504.0,
+            "1358": 849621504.0,
+            "1359": 849621504.0,
+            "1360": 849621504.0,
+            "1361": 849621504.0,
+            "1362": 849621504.0,
+            "1363": 849621504.0,
+            "1364": 849621504.0,
+            "1365": 849621504.0,
+            "1366": 849621504.0,
+            "1367": 849621504.0,
+            "1368": 849621504.0,
+            "1369": 849621504.0,
+            "1370": 849621504.0,
+            "1371": 849621504.0,
+            "1372": 849621504.0,
+            "1373": 849621504.0,
+            "1374": 849621504.0,
+            "1375": 849621504.0,
+            "1376": 849621504.0,
+            "1377": 849621504.0,
+            "1378": 849621504.0,
+            "1379": 849621504.0,
+            "1380": 849621504.0,
+            "1381": 849621504.0,
+            "1382": 849621504.0,
+            "1383": 849621504.0,
+            "1384": 849621504.0,
+            "1385": 849621504.0,
+            "1386": 849621504.0,
+            "1387": 849621504.0,
+            "1388": 849621504.0,
+            "1389": 849621504.0,
+            "1390": 849621504.0,
+            "1391": 849621504.0,
+            "1392": 849621504.0,
+            "1393": 849621504.0,
+            "1394": 849621504.0,
+            "1395": 849621504.0,
+            "1396": 849621504.0,
+            "1397": 849621504.0,
+            "1398": 849621504.0,
+            "1399": 849621504.0,
+            "1400": 849621504.0,
+            "1401": 849621504.0,
+            "1402": 849621504.0,
+            "1403": 849621504.0,
+            "1404": 849621504.0,
+            "1405": 849621504.0,
+            "1406": 849621504.0,
+            "1407": 849621504.0,
+            "1408": 849621504.0,
+            "1409": 849621504.0,
+            "1410": 849621504.0,
+            "1411": 849621504.0,
+            "1412": 849621504.0,
+            "1413": 849621504.0,
+            "1414": 849621504.0,
+            "1415": 849621504.0,
+            "1416": 849621504.0,
+            "1417": 849621504.0,
+            "1418": 849621504.0,
+            "1419": 849621504.0,
+            "1420": 849621504.0,
+            "1421": 849621504.0,
+            "1422": 849621504.0,
+            "1423": 849621504.0,
+            "1424": 849621504.0,
+            "1425": 849621504.0,
+            "1426": 849621504.0,
+            "1427": 849621504.0,
+            "1428": 849621504.0,
+            "1429": 849621504.0,
+            "1430": 849621504.0,
+            "1431": 849621504.0,
+            "1432": 849621504.0,
+            "1433": 849621504.0,
+            "1434": 849621504.0,
+            "1435": 849621504.0,
+            "1436": 849621504.0,
+            "1437": 849621504.0,
+            "1438": 849621504.0,
+            "1439": 849621504.0,
+            "1440": 849621504.0,
+            "1441": 849621504.0,
+            "1442": 849621504.0,
+            "1443": 849621504.0,
+            "1444": 849621504.0,
+            "1445": 849621504.0,
+            "1446": 849621504.0,
+            "1447": 849621504.0,
+            "1448": 849621504.0,
+            "1449": 849621504.0,
+            "1450": 849621504.0,
+            "1451": 849621504.0,
+            "1452": 849621504.0,
+            "1453": 849621504.0,
+            "1454": 849621504.0,
+            "1455": 849621504.0,
+            "1456": 849621504.0,
+            "1457": 849621504.0,
+            "1458": 849621504.0,
+            "1459": 849621504.0,
+            "1460": 849621504.0,
+            "1461": 849621504.0,
+            "1462": 849621504.0,
+            "1463": 849621504.0,
+            "1464": 849621504.0,
+            "1465": 849621504.0,
+            "1466": 849621504.0,
+            "1467": 849621504.0,
+            "1468": 849621504.0,
+            "1469": 849621504.0,
+            "1470": 849621504.0,
+            "1471": 849621504.0,
+            "1472": 849621504.0,
+            "1473": 849621504.0,
+            "1474": 849621504.0,
+            "1475": 849621504.0,
+            "1476": 849621504.0,
+            "1477": 849621504.0,
+            "1478": 849621504.0,
+            "1479": 849621504.0,
+            "1480": 849621504.0,
+            "1481": 849621504.0,
+            "1482": 849621504.0,
+            "1483": 849621504.0,
+            "1484": 849621504.0,
+            "1485": 849621504.0,
+            "1486": 849621504.0,
+            "1487": 849621504.0,
+            "1488": 849621504.0,
+            "1489": 849621504.0,
+            "1490": 849621504.0,
+            "1491": 849621504.0,
+            "1492": 849621504.0,
+            "1493": 849621504.0,
+            "1494": 849621504.0,
+            "1495": 849621504.0,
+            "1496": 849621504.0,
+            "1497": 849621504.0,
+            "1498": 849621504.0,
+            "1499": 849621504.0,
+            "1500": 849621504.0,
+            "1501": 849621504.0,
+            "1502": 849621504.0,
+            "1503": 849621504.0,
+            "1504": 849621504.0,
+            "1505": 849621504.0,
+            "1506": 849621504.0,
+            "1507": 849621504.0,
+            "1508": 849621504.0,
+            "1509": 849621504.0,
+            "1510": 849621504.0,
+            "1511": 849621504.0,
+            "1512": 849621504.0,
+            "1513": 849621504.0,
+            "1514": 849621504.0,
+            "1515": 849621504.0,
+            "1516": 849621504.0,
+            "1517": 849621504.0,
+            "1518": 849621504.0,
+            "1519": 849621504.0,
+            "1520": 849621504.0,
+            "1521": 849621504.0,
+            "1522": 849621504.0,
+            "1523": 849621504.0,
+            "1524": 849621504.0,
+            "1525": 849621504.0,
+            "1526": 849621504.0,
+            "1527": 849621504.0,
+            "1528": 849621504.0,
+            "1529": 849621504.0,
+            "1530": 849621504.0,
+            "1531": 849621504.0,
+            "1532": 849621504.0,
+            "1533": 849621504.0,
+            "1534": 849621504.0,
+            "1535": 849621504.0,
+            "1536": 849621504.0,
+            "1537": 849621504.0,
+            "1538": 849621504.0,
+            "1539": 849621504.0,
+            "1540": 849621504.0,
+            "1541": 849621504.0,
+            "1542": 849621504.0,
+            "1543": 849621504.0,
+            "1544": 849621504.0,
+            "1545": 849621504.0,
+            "1546": 849621504.0,
+            "1547": 849621504.0,
+            "1548": 849621504.0,
+            "1549": 849621504.0,
+            "1550": 849621504.0,
+            "1551": 849621504.0,
+            "1552": 849621504.0,
+            "1553": 849621504.0,
+            "1554": 849621504.0,
+            "1555": 849621504.0,
+            "1556": 849621504.0,
+            "1557": 849621504.0,
+            "1558": 849621504.0,
+            "1559": 849621504.0,
+            "1560": 849621504.0,
+            "1561": 849621504.0,
+            "1562": 849621504.0,
+            "1563": 849621504.0,
+            "1564": 849621504.0,
+            "1565": 849621504.0,
+            "1566": 849621504.0,
+            "1567": 849621504.0,
+            "1568": 849621504.0,
+            "1569": 849621504.0,
+            "1570": 849621504.0,
+            "1571": 849621504.0,
+            "1572": 849621504.0,
+            "1573": 849621504.0,
+            "1574": 849621504.0,
+            "1575": 849621504.0,
+            "1576": 849621504.0,
+            "1577": 849621504.0,
+            "1578": 849621504.0,
+            "1579": 849621504.0,
+            "1580": 849621504.0,
+            "1581": 849621504.0,
+            "1582": 849621504.0,
+            "1583": 849621504.0,
+            "1584": 849621504.0,
+            "1585": 849621504.0,
+            "1586": 849621504.0,
+            "1587": 849621504.0,
+            "1588": 849621504.0,
+            "1589": 849621504.0,
+            "1590": 849621504.0,
+            "1591": 849621504.0,
+            "1592": 849621504.0,
+            "1593": 849621504.0,
+            "1594": 849621504.0,
+            "1595": 849621504.0,
+            "1596": 849621504.0,
+            "1597": 849621504.0,
+            "1598": 849621504.0,
+            "1599": 849621504.0,
+            "1600": 849621504.0,
+            "1601": 849621504.0,
+            "1602": 849621504.0,
+            "1603": 849621504.0,
+            "1604": 849621504.0,
+            "1605": 849621504.0,
+            "1606": 849621504.0,
+            "1607": 849621504.0,
+            "1608": 849621504.0,
+            "1609": 849621504.0,
+            "1610": 849621504.0,
+            "1611": 849621504.0,
+            "1612": 849621504.0,
+            "1613": 849621504.0,
+            "1614": 849621504.0,
+            "1615": 849621504.0,
+            "1616": 849621504.0,
+            "1617": 849621504.0,
+            "1618": 849621504.0,
+            "1619": 849621504.0,
+            "1620": 849621504.0,
+            "1621": 849621504.0,
+            "1622": 849621504.0,
+            "1623": 849621504.0,
+            "1624": 849621504.0,
+            "1625": 849621504.0,
+            "1626": 849621504.0,
+            "1627": 849621504.0,
+            "1628": 849621504.0,
+            "1629": 849621504.0,
+            "1630": 849621504.0,
+            "1631": 849621504.0,
+            "1632": 849621504.0,
+            "1633": 849621504.0,
+            "1634": 849621504.0,
+            "1635": 849621504.0,
+            "1636": 849621504.0,
+            "1637": 849621504.0,
+            "1638": 849621504.0,
+            "1639": 849621504.0,
+            "1640": 849621504.0,
+            "1641": 849621504.0,
+            "1642": 849621504.0,
+            "1643": 849621504.0,
+            "1644": 849621504.0,
+            "1645": 849621504.0,
+            "1646": 849621504.0,
+            "1647": 849621504.0,
+            "1648": 849621504.0,
+            "1649": 849621504.0,
+            "1650": 849621504.0,
+            "1651": 849621504.0,
+            "1652": 849621504.0,
+            "1653": 849621504.0,
+            "1654": 849621504.0,
+            "1655": 849621504.0,
+            "1656": 849621504.0,
+            "1657": 849621504.0,
+            "1658": 849621504.0,
+            "1659": 849621504.0,
+            "1660": 849621504.0,
+            "1661": 849621504.0,
+            "1662": 849621504.0,
+            "1663": 849621504.0,
+            "1664": 849621504.0,
+            "1665": 849621504.0,
+            "1666": 849621504.0,
+            "1667": 849621504.0,
+            "1668": 849621504.0,
+            "1669": 849621504.0,
+            "1670": 849621504.0,
+            "1671": 849621504.0,
+            "1672": 849621504.0,
+            "1673": 849621504.0,
+            "1674": 849621504.0,
+            "1675": 849621504.0,
+            "1676": 849621504.0,
+            "1677": 849621504.0,
+            "1678": 849621504.0,
+            "1679": 849621504.0,
+            "1680": 849621504.0,
+            "1681": 849621504.0,
+            "1682": 849621504.0,
+            "1683": 849621504.0,
+            "1684": 849621504.0,
+            "1685": 849621504.0,
+            "1686": 849621504.0,
+            "1687": 849621504.0,
+            "1688": 849621504.0,
+            "1689": 849621504.0,
+            "1690": 849621504.0,
+            "1691": 849621504.0,
+            "1692": 849621504.0,
+            "1693": 849621504.0,
+            "1694": 849621504.0,
+            "1695": 849621504.0,
+            "1696": 849621504.0,
+            "1697": 849621504.0,
+            "1698": 849621504.0,
+            "1699": 849621504.0,
+            "1700": 849621504.0,
+            "1701": 849621504.0,
+            "1702": 849621504.0,
+            "1703": 849621504.0,
+            "1704": 849621504.0,
+            "1705": 849621504.0,
+            "1706": 849621504.0,
+            "1707": 849621504.0,
+            "1708": 849621504.0,
+            "1709": 849621504.0,
+            "1710": 849621504.0,
+            "1711": 849621504.0,
+            "1712": 849621504.0,
+            "1713": 849621504.0,
+            "1714": 849621504.0,
+            "1715": 849621504.0,
+            "1716": 849621504.0,
+            "1717": 849621504.0,
+            "1718": 849621504.0,
+            "1719": 849621504.0,
+            "1720": 849621504.0,
+            "1721": 849621504.0,
+            "1722": 849621504.0,
+            "1723": 849621504.0,
+            "1724": 849621504.0,
+            "1725": 849621504.0,
+            "1726": 849621504.0,
+            "1727": 849621504.0,
+            "1728": 849621504.0,
+            "1729": 849621504.0,
+            "1730": 849621504.0,
+            "1731": 849621504.0,
+            "1732": 849621504.0,
+            "1733": 849621504.0,
+            "1734": 849621504.0,
+            "1735": 849621504.0,
+            "1736": 849621504.0,
+            "1737": 849621504.0,
+            "1738": 849621504.0,
+            "1739": 849621504.0,
+            "1740": 849621504.0,
+            "1741": 849621504.0,
+            "1742": 849621504.0,
+            "1743": 849621504.0,
+            "1744": 849621504.0,
+            "1745": 849621504.0,
+            "1746": 849621504.0,
+            "1747": 849621504.0,
+            "1748": 849621504.0,
+            "1749": 849621504.0,
+            "1750": 849621504.0,
+            "1751": 849621504.0,
+            "1752": 849621504.0,
+            "1753": 849621504.0,
+            "1754": 849621504.0,
+            "1755": 849621504.0,
+            "1756": 849621504.0,
+            "1757": 849621504.0,
+            "1758": 849621504.0,
+            "1759": 849621504.0,
+            "1760": 849621504.0,
+            "1761": 849621504.0,
+            "1762": 849621504.0,
+            "1763": 849621504.0,
+            "1764": 849621504.0,
+            "1765": 849621504.0,
+            "1766": 849621504.0,
+            "1767": 849621504.0,
+            "1768": 849621504.0,
+            "1769": 849621504.0,
+            "1770": 849621504.0,
+            "1771": 849621504.0,
+            "1772": 849621504.0,
+            "1773": 849621504.0,
+            "1774": 849621504.0,
+            "1775": 849621504.0,
+            "1776": 849621504.0,
+            "1777": 849621504.0,
+            "1778": 849621504.0,
+            "1779": 849621504.0,
+            "1780": 849621504.0,
+            "1781": 849621504.0,
+            "1782": 849621504.0,
+            "1783": 849621504.0,
+            "1784": 849621504.0,
+            "1785": 849621504.0,
+            "1786": 849621504.0,
+            "1787": 849621504.0,
+            "1788": 849621504.0,
+            "1789": 849621504.0,
+            "1790": 849621504.0,
+            "1791": 849621504.0,
+            "1792": 849621504.0,
+            "1793": 849621504.0,
+            "1794": 849621504.0,
+            "1795": 849621504.0,
+            "1796": 849621504.0,
+            "1797": 849621504.0,
+            "1798": 849621504.0,
+            "1799": 849621504.0,
+            "1800": 849621504.0,
+            "1801": 849621504.0,
+            "1802": 849621504.0,
+            "1803": 849621504.0,
+            "1804": 849621504.0,
+            "1805": 849621504.0,
+            "1806": 849621504.0,
+            "1807": 849621504.0,
+            "1808": 849621504.0,
+            "1809": 849621504.0,
+            "1810": 849621504.0,
+            "1811": 849621504.0,
+            "1812": 849621504.0,
+            "1813": 849621504.0,
+            "1814": 849621504.0,
+            "1815": 849621504.0,
+            "1816": 849621504.0,
+            "1817": 849621504.0,
+            "1818": 849621504.0,
+            "1819": 849621504.0,
+            "1820": 849621504.0,
+            "1821": 849621504.0,
+            "1822": 849621504.0,
+            "1823": 849621504.0,
+            "1824": 849621504.0,
+            "1825": 849621504.0,
+            "1826": 849621504.0,
+            "1827": 849621504.0,
+            "1828": 849621504.0,
+            "1829": 849621504.0,
+            "1830": 849621504.0,
+            "1831": 849621504.0,
+            "1832": 849621504.0,
+            "1833": 849621504.0,
+            "1834": 849621504.0,
+            "1835": 849621504.0,
+            "1836": 849621504.0,
+            "1837": 849621504.0,
+            "1838": 849621504.0,
+            "1839": 849621504.0,
+            "1840": 849621504.0,
+            "1841": 849621504.0,
+            "1842": 849621504.0,
+            "1843": 849621504.0,
+            "1844": 849621504.0,
+            "1845": 849621504.0,
+            "1846": 849621504.0,
+            "1847": 849621504.0,
+            "1848": 849621504.0,
+            "1849": 849621504.0,
+            "1850": 849621504.0,
+            "1851": 849621504.0,
+            "1852": 849621504.0,
+            "1853": 849621504.0,
+            "1854": 849621504.0,
+            "1855": 849621504.0,
+            "1856": 849621504.0,
+            "1857": 849621504.0,
+            "1858": 849621504.0,
+            "1859": 849621504.0,
+            "1860": 849621504.0,
+            "1861": 849621504.0,
+            "1862": 849621504.0,
+            "1863": 849621504.0,
+            "1864": 849621504.0,
+            "1865": 849621504.0,
+            "1866": 849621504.0,
+            "1867": 849621504.0,
+            "1868": 849621504.0,
+            "1869": 849621504.0,
+            "1870": 849621504.0,
+            "1871": 849621504.0,
+            "1872": 849621504.0,
+            "1873": 849621504.0,
+            "1874": 849621504.0,
+            "1875": 849621504.0,
+            "1876": 849621504.0,
+            "1877": 849621504.0,
+            "1878": 849621504.0,
+            "1879": 849621504.0,
+            "1880": 849621504.0,
+            "1881": 849621504.0,
+            "1882": 849621504.0,
+            "1883": 849621504.0,
+            "1884": 849621504.0,
+            "1885": 849621504.0,
+            "1886": 849621504.0,
+            "1887": 849621504.0,
+            "1888": 849621504.0,
+            "1889": 849621504.0,
+            "1890": 849621504.0,
+            "1891": 849621504.0,
+            "1892": 849621504.0,
+            "1893": 849621504.0,
+            "1894": 849621504.0,
+            "1895": 849621504.0,
+            "1896": 849621504.0,
+            "1897": 849621504.0,
+            "1898": 849621504.0,
+            "1899": 849621504.0,
+            "1900": 849621504.0,
+            "1901": 849621504.0,
+            "1902": 849621504.0,
+            "1903": 849621504.0,
+            "1904": 849621504.0,
+            "1905": 849621504.0,
+            "1906": 849621504.0,
+            "1907": 849621504.0,
+            "1908": 849621504.0,
+            "1909": 849621504.0,
+            "1910": 849621504.0,
+            "1911": 849621504.0,
+            "1912": 849621504.0,
+            "1913": 849621504.0,
+            "1914": 849621504.0,
+            "1915": 849621504.0,
+            "1916": 849621504.0,
+            "1917": 849621504.0,
+            "1918": 849621504.0,
+            "1919": 849621504.0,
+            "1920": 849621504.0,
+            "1921": 849621504.0,
+            "1922": 849621504.0,
+            "1923": 849621504.0,
+            "1924": 849621504.0,
+            "1925": 849621504.0,
+            "1926": 849621504.0,
+            "1927": 849621504.0,
+            "1928": 849621504.0,
+            "1929": 849621504.0,
+            "1930": 849621504.0,
+            "1931": 849621504.0,
+            "1932": 849621504.0,
+            "1933": 849621504.0,
+            "1934": 849621504.0,
+            "1935": 849621504.0,
+            "1936": 849621504.0,
+            "1937": 849621504.0,
+            "1938": 849621504.0,
+            "1939": 849621504.0,
+            "1940": 849621504.0,
+            "1941": 849621504.0,
+            "1942": 849621504.0,
+            "1943": 849621504.0,
+            "1944": 849621504.0,
+            "1945": 849621504.0,
+            "1946": 849621504.0,
+            "1947": 849621504.0,
+            "1948": 849621504.0,
+            "1949": 849621504.0,
+            "1950": 849621504.0,
+            "1951": 849621504.0,
+            "1952": 849621504.0,
+            "1953": 849621504.0,
+            "1954": 849621504.0,
+            "1955": 849621504.0,
+            "1956": 849621504.0,
+            "1957": 849621504.0,
+            "1958": 849621504.0,
+            "1959": 849621504.0,
+            "1960": 849621504.0,
+            "1961": 849621504.0,
+            "1962": 849621504.0,
+            "1963": 849621504.0,
+            "1964": 849621504.0,
+            "1965": 849621504.0,
+            "1966": 849621504.0,
+            "1967": 849621504.0,
+            "1968": 849621504.0,
+            "1969": 849621504.0,
+            "1970": 849621504.0,
+            "1971": 849621504.0,
+            "1972": 849621504.0,
+            "1973": 849621504.0,
+            "1974": 849621504.0,
+            "1975": 849621504.0,
+            "1976": 849621504.0,
+            "1977": 849621504.0,
+            "1978": 849621504.0,
+            "1979": 849621504.0,
+            "1980": 849621504.0,
+            "1981": 849621504.0,
+            "1982": 849621504.0,
+            "1983": 849621504.0,
+            "1984": 849621504.0,
+            "1985": 849621504.0,
+            "1986": 849621504.0,
+            "1987": 849621504.0,
+            "1988": 849621504.0,
+            "1989": 849621504.0,
+            "1990": 849621504.0,
+            "1991": 849621504.0,
+            "1992": 849621504.0,
+            "1993": 849621504.0,
+            "1994": 849621504.0,
+            "1995": 849621504.0,
+            "1996": 849621504.0,
+            "1997": 849621504.0,
+            "1998": 849621504.0,
+            "1999": 849621504.0,
+            "2000": 849621504.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 14.94115,
+            "2": 1.30868,
+            "3": 1.13391,
+            "4": 1.12792,
+            "5": 1.13103,
+            "6": 1.1383,
+            "7": 1.13573,
+            "8": 1.15789,
+            "9": 1.12704,
+            "10": 1.1241,
+            "11": 1.12786,
+            "12": 1.1288,
+            "13": 1.1399,
+            "14": 1.13165,
+            "15": 1.12333,
+            "16": 1.12398,
+            "17": 1.12493,
+            "18": 1.11586,
+            "19": 1.1123,
+            "20": 1.11192,
+            "21": 1.1266,
+            "22": 1.13629,
+            "23": 1.13171,
+            "24": 1.14969,
+            "25": 1.17022,
+            "26": 1.14634,
+            "27": 1.14242,
+            "28": 1.14353,
+            "29": 1.14554,
+            "30": 1.28826,
+            "31": 1.14265,
+            "32": 1.14023,
+            "33": 1.15286,
+            "34": 1.14975,
+            "35": 1.13988,
+            "36": 1.62757,
+            "37": 2.22703,
+            "38": 1.36074,
+            "39": 1.1325,
+            "40": 1.14106,
+            "41": 1.14114,
+            "42": 1.13305,
+            "43": 1.12375,
+            "44": 1.12631,
+            "45": 1.12358,
+            "46": 1.12334,
+            "47": 1.12398,
+            "48": 1.12749,
+            "49": 1.13897,
+            "50": 1.13563,
+            "51": 1.13628,
+            "52": 1.12935,
+            "53": 1.12779,
+            "54": 1.13147,
+            "55": 1.1279,
+            "56": 1.12777,
+            "57": 1.1269,
+            "58": 1.13989,
+            "59": 1.13378,
+            "60": 1.13552,
+            "61": 1.12879,
+            "62": 1.4796,
+            "63": 1.12843,
+            "64": 1.12488,
+            "65": 1.12888,
+            "66": 1.14028,
+            "67": 1.13532,
+            "68": 1.13278,
+            "69": 1.12779,
+            "70": 1.12468,
+            "71": 1.12483,
+            "72": 1.12423,
+            "73": 1.12335,
+            "74": 1.12699,
+            "75": 1.13379,
+            "76": 1.13001,
+            "77": 1.12994,
+            "78": 1.13166,
+            "79": 1.12415,
+            "80": 1.126,
+            "81": 1.16016,
+            "82": 1.13845,
+            "83": 1.13882,
+            "84": 1.14455,
+            "85": 1.46908,
+            "86": 1.1259,
+            "87": 1.12119,
+            "88": 1.12312,
+            "89": 1.12593,
+            "90": 1.51995,
+            "91": 1.16022,
+            "92": 1.1304,
+            "93": 1.13161,
+            "94": 1.13511,
+            "95": 1.13911,
+            "96": 1.80205,
+            "97": 1.13368,
+            "98": 1.13335,
+            "99": 1.13549,
+            "100": 1.13409,
+            "101": 1.13703,
+            "102": 1.14592,
+            "103": 1.13516,
+            "104": 1.13661,
+            "105": 1.13299,
+            "106": 1.13577,
+            "107": 1.13657,
+            "108": 1.13144,
+            "109": 1.14828,
+            "110": 1.15036,
+            "111": 1.1486,
+            "112": 1.14183,
+            "113": 1.14297,
+            "114": 1.1411,
+            "115": 1.14318,
+            "116": 1.14291,
+            "117": 1.14168,
+            "118": 1.15055,
+            "119": 1.1482,
+            "120": 1.15352,
+            "121": 1.13046,
+            "122": 1.145,
+            "123": 1.14278,
+            "124": 1.1428,
+            "125": 1.14189,
+            "126": 1.13609,
+            "127": 1.14025,
+            "128": 1.14097,
+            "129": 1.13489,
+            "130": 1.13417,
+            "131": 1.13581,
+            "132": 1.13708,
+            "133": 1.17896,
+            "134": 1.13176,
+            "135": 1.12984,
+            "136": 1.1435,
+            "137": 1.15088,
+            "138": 1.14391,
+            "139": 1.14409,
+            "140": 1.14238,
+            "141": 1.14313,
+            "142": 1.1493,
+            "143": 1.13518,
+            "144": 1.13229,
+            "145": 1.13749,
+            "146": 1.15049,
+            "147": 1.16077,
+            "148": 1.14254,
+            "149": 1.14071,
+            "150": 1.14075,
+            "151": 1.13943,
+            "152": 1.15276,
+            "153": 1.15369,
+            "154": 1.14618,
+            "155": 1.14225,
+            "156": 1.14285,
+            "157": 1.14106,
+            "158": 1.14415,
+            "159": 1.14445,
+            "160": 1.14934,
+            "161": 1.14229,
+            "162": 1.14167,
+            "163": 1.14058,
+            "164": 1.14064,
+            "165": 1.14012,
+            "166": 1.15198,
+            "167": 1.15221,
+            "168": 1.1471,
+            "169": 1.14122,
+            "170": 1.14769,
+            "171": 1.14073,
+            "172": 1.14205,
+            "173": 1.14583,
+            "174": 1.14217,
+            "175": 1.14015,
+            "176": 1.14319,
+            "177": 1.14097,
+            "178": 1.14115,
+            "179": 1.14122,
+            "180": 1.15137,
+            "181": 1.14856,
+            "182": 1.15203,
+            "183": 1.14535,
+            "184": 1.13997,
+            "185": 1.15174,
+            "186": 1.18192,
+            "187": 1.14929,
+            "188": 1.14842,
+            "189": 1.14724,
+            "190": 1.14922,
+            "191": 1.14932,
+            "192": 1.14856,
+            "193": 1.1562,
+            "194": 1.153,
+            "195": 1.16371,
+            "196": 1.14525,
+            "197": 1.1411,
+            "198": 1.14592,
+            "199": 1.14301,
+            "200": 1.15088,
+            "201": 1.14229,
+            "202": 1.14171,
+            "203": 1.14083,
+            "204": 1.13968,
+            "205": 1.13977,
+            "206": 1.14177,
+            "207": 1.15548,
+            "208": 1.15609,
+            "209": 1.14509,
+            "210": 1.1487,
+            "211": 1.14163,
+            "212": 1.13971,
+            "213": 1.15326,
+            "214": 1.14129,
+            "215": 1.14055,
+            "216": 1.13893,
+            "217": 1.14191,
+            "218": 1.1418,
+            "219": 1.14249,
+            "220": 1.14162,
+            "221": 1.14077,
+            "222": 1.15513,
+            "223": 1.15668,
+            "224": 1.14515,
+            "225": 1.14589,
+            "226": 1.14548,
+            "227": 1.14318,
+            "228": 1.14204,
+            "229": 1.14391,
+            "230": 1.14565,
+            "231": 1.1439,
+            "232": 1.14309,
+            "233": 1.14396,
+            "234": 1.14146,
+            "235": 1.14229,
+            "236": 1.14106,
+            "237": 1.14362,
+            "238": 1.15203,
+            "239": 1.1942,
+            "240": 1.18025,
+            "241": 1.15197,
+            "242": 1.15276,
+            "243": 1.15399,
+            "244": 1.15628,
+            "245": 1.14958,
+            "246": 1.14931,
+            "247": 1.14093,
+            "248": 1.13869,
+            "249": 1.1385,
+            "250": 1.13897,
+            "251": 1.13787,
+            "252": 1.13939,
+            "253": 1.17282,
+            "254": 1.13361,
+            "255": 1.13502,
+            "256": 1.13895,
+            "257": 1.16245,
+            "258": 1.1352,
+            "259": 1.15685,
+            "260": 1.14637,
+            "261": 1.2867,
+            "262": 1.13699,
+            "263": 1.13959,
+            "264": 1.15414,
+            "265": 1.14324,
+            "266": 1.14515,
+            "267": 1.14328,
+            "268": 1.14359,
+            "269": 1.144,
+            "270": 1.15446,
+            "271": 1.15182,
+            "272": 1.15575,
+            "273": 1.15561,
+            "274": 1.15762,
+            "275": 1.15307,
+            "276": 1.1516,
+            "277": 1.1569,
+            "278": 1.15789,
+            "279": 1.168,
+            "280": 1.16711,
+            "281": 1.16858,
+            "282": 1.16899,
+            "283": 1.15631,
+            "284": 1.15543,
+            "285": 1.15685,
+            "286": 1.15663,
+            "287": 1.15204,
+            "288": 1.15333,
+            "289": 1.15257,
+            "290": 1.14865,
+            "291": 1.15067,
+            "292": 1.15626,
+            "293": 1.15161,
+            "294": 1.15116,
+            "295": 1.15102,
+            "296": 1.15104,
+            "297": 1.17304,
+            "298": 1.17562,
+            "299": 1.17694,
+            "300": 1.15026,
+            "301": 1.15562,
+            "302": 1.15582,
+            "303": 1.15039,
+            "304": 1.14517,
+            "305": 1.14745,
+            "306": 1.15392,
+            "307": 1.15054,
+            "308": 1.14391,
+            "309": 1.1426,
+            "310": 1.1434,
+            "311": 1.14297,
+            "312": 1.14164,
+            "313": 1.15234,
+            "314": 1.14891,
+            "315": 1.14745,
+            "316": 1.15325,
+            "317": 1.15145,
+            "318": 1.51061,
+            "319": 1.13797,
+            "320": 1.13871,
+            "321": 1.20976,
+            "322": 1.19788,
+            "323": 1.14258,
+            "324": 1.14169,
+            "325": 1.14227,
+            "326": 1.1426,
+            "327": 1.14596,
+            "328": 1.14584,
+            "329": 1.14606,
+            "330": 1.13676,
+            "331": 1.14712,
+            "332": 1.14502,
+            "333": 1.14602,
+            "334": 1.14598,
+            "335": 1.15781,
+            "336": 1.15666,
+            "337": 1.1498,
+            "338": 1.15651,
+            "339": 1.15267,
+            "340": 1.14703,
+            "341": 1.14889,
+            "342": 1.14863,
+            "343": 1.14731,
+            "344": 1.1479,
+            "345": 1.20819,
+            "346": 1.15653,
+            "347": 1.15548,
+            "348": 1.15594,
+            "349": 1.15558,
+            "350": 1.15652,
+            "351": 1.15348,
+            "352": 1.15517,
+            "353": 1.15665,
+            "354": 1.15895,
+            "355": 1.15829,
+            "356": 1.16229,
+            "357": 1.17016,
+            "358": 1.16317,
+            "359": 1.18492,
+            "360": 1.20126,
+            "361": 1.19034,
+            "362": 1.18723,
+            "363": 1.16724,
+            "364": 1.14627,
+            "365": 1.14394,
+            "366": 1.14503,
+            "367": 1.14264,
+            "368": 1.14464,
+            "369": 1.14478,
+            "370": 1.14447,
+            "371": 1.15012,
+            "372": 1.14509,
+            "373": 1.14362,
+            "374": 1.14617,
+            "375": 1.14658,
+            "376": 1.13748,
+            "377": 1.15141,
+            "378": 1.14564,
+            "379": 1.14278,
+            "380": 1.14166,
+            "381": 1.14361,
+            "382": 1.14293,
+            "383": 1.14196,
+            "384": 1.14178,
+            "385": 1.14053,
+            "386": 1.14184,
+            "387": 1.14451,
+            "388": 1.14162,
+            "389": 1.1419,
+            "390": 1.14477,
+            "391": 1.15539,
+            "392": 1.16117,
+            "393": 1.16925,
+            "394": 1.16815,
+            "395": 1.1561,
+            "396": 1.15146,
+            "397": 1.15422,
+            "398": 1.14884,
+            "399": 1.14136,
+            "400": 1.14059,
+            "401": 1.14105,
+            "402": 1.14013,
+            "403": 1.15094,
+            "404": 1.13492,
+            "405": 1.1425,
+            "406": 1.14173,
+            "407": 1.14385,
+            "408": 1.14421,
+            "409": 1.14226,
+            "410": 1.1417,
+            "411": 1.1511,
+            "412": 1.15763,
+            "413": 1.15891,
+            "414": 1.15294,
+            "415": 1.15191,
+            "416": 1.15346,
+            "417": 1.15001,
+            "418": 1.15279,
+            "419": 1.14974,
+            "420": 1.14848,
+            "421": 1.14722,
+            "422": 1.15396,
+            "423": 1.1499,
+            "424": 1.15269,
+            "425": 1.15087,
+            "426": 1.14945,
+            "427": 1.15106,
+            "428": 1.15515,
+            "429": 1.14379,
+            "430": 1.16231,
+            "431": 1.18658,
+            "432": 1.17212,
+            "433": 1.16725,
+            "434": 1.17832,
+            "435": 1.16254,
+            "436": 1.16094,
+            "437": 1.15865,
+            "438": 1.16104,
+            "439": 1.1621,
+            "440": 1.13911,
+            "441": 1.13485,
+            "442": 1.13534,
+            "443": 1.13627,
+            "444": 1.13432,
+            "445": 1.13868,
+            "446": 1.13561,
+            "447": 1.13518,
+            "448": 1.1365,
+            "449": 1.13444,
+            "450": 1.13455,
+            "451": 1.14098,
+            "452": 1.15368,
+            "453": 1.1566,
+            "454": 1.15931,
+            "455": 1.18151,
+            "456": 1.16215,
+            "457": 1.16012,
+            "458": 1.15916,
+            "459": 1.15837,
+            "460": 1.16214,
+            "461": 1.1652,
+            "462": 1.16044,
+            "463": 1.16179,
+            "464": 1.163,
+            "465": 1.16332,
+            "466": 1.15968,
+            "467": 1.16196,
+            "468": 1.1592,
+            "469": 1.15988,
+            "470": 1.16081,
+            "471": 1.16128,
+            "472": 1.15868,
+            "473": 1.16004,
+            "474": 1.16125,
+            "475": 1.15956,
+            "476": 1.16733,
+            "477": 1.18857,
+            "478": 1.15838,
+            "479": 1.16068,
+            "480": 1.16004,
+            "481": 1.15956,
+            "482": 1.15757,
+            "483": 1.15802,
+            "484": 1.16061,
+            "485": 1.15848,
+            "486": 1.16058,
+            "487": 1.15819,
+            "488": 1.15991,
+            "489": 1.15831,
+            "490": 1.1589,
+            "491": 1.16144,
+            "492": 1.15934,
+            "493": 1.15973,
+            "494": 1.16104,
+            "495": 1.15933,
+            "496": 1.16173,
+            "497": 1.16203,
+            "498": 1.16059,
+            "499": 1.16461,
+            "500": 1.16533,
+            "501": 1.1723,
+            "502": 1.17075,
+            "503": 1.17256,
+            "504": 1.16176,
+            "505": 1.15972,
+            "506": 1.16185,
+            "507": 1.21311,
+            "508": 1.16326,
+            "509": 1.15384,
+            "510": 1.15071,
+            "511": 1.15307,
+            "512": 1.15748,
+            "513": 1.1518,
+            "514": 1.15181,
+            "515": 1.15338,
+            "516": 1.1524,
+            "517": 1.15481,
+            "518": 1.15358,
+            "519": 1.16302,
+            "520": 1.16218,
+            "521": 1.15461,
+            "522": 1.157,
+            "523": 1.15817,
+            "524": 1.15517,
+            "525": 1.15361,
+            "526": 1.15183,
+            "527": 1.15237,
+            "528": 1.15423,
+            "529": 1.15637,
+            "530": 1.15521,
+            "531": 1.15012,
+            "532": 1.15132,
+            "533": 1.1495,
+            "534": 1.14919,
+            "535": 1.1546,
+            "536": 1.15442,
+            "537": 1.1514,
+            "538": 1.15195,
+            "539": 1.15221,
+            "540": 1.15639,
+            "541": 1.1549,
+            "542": 1.15495,
+            "543": 1.15683,
+            "544": 1.16361,
+            "545": 1.16186,
+            "546": 1.15697,
+            "547": 1.15978,
+            "548": 1.16151,
+            "549": 1.15737,
+            "550": 1.15451,
+            "551": 1.16057,
+            "552": 1.20604,
+            "553": 1.15937,
+            "554": 1.21638,
+            "555": 1.16193,
+            "556": 1.16004,
+            "557": 1.15937,
+            "558": 1.15924,
+            "559": 1.15864,
+            "560": 1.16064,
+            "561": 1.15935,
+            "562": 1.43389,
+            "563": 1.16041,
+            "564": 1.16122,
+            "565": 1.49173,
+            "566": 1.15954,
+            "567": 1.17345,
+            "568": 1.16261,
+            "569": 1.15966,
+            "570": 1.1607,
+            "571": 1.15553,
+            "572": 1.1568,
+            "573": 1.15385,
+            "574": 1.15701,
+            "575": 1.15849,
+            "576": 1.15634,
+            "577": 1.15908,
+            "578": 1.15576,
+            "579": 1.15627,
+            "580": 1.14973,
+            "581": 1.16027,
+            "582": 1.16176,
+            "583": 1.15493,
+            "584": 1.15722,
+            "585": 1.15744,
+            "586": 1.15502,
+            "587": 1.1559,
+            "588": 1.15496,
+            "589": 1.16378,
+            "590": 1.16595,
+            "591": 1.16611,
+            "592": 1.16989,
+            "593": 1.16842,
+            "594": 1.17261,
+            "595": 1.15925,
+            "596": 1.16083,
+            "597": 1.16113,
+            "598": 1.16297,
+            "599": 1.16456,
+            "600": 1.15983,
+            "601": 1.16187,
+            "602": 1.15943,
+            "603": 1.15985,
+            "604": 1.1592,
+            "605": 1.15871,
+            "606": 1.16032,
+            "607": 1.15919,
+            "608": 1.17988,
+            "609": 1.16067,
+            "610": 1.18157,
+            "611": 1.15299,
+            "612": 1.15282,
+            "613": 1.15274,
+            "614": 1.15344,
+            "615": 1.15192,
+            "616": 1.15757,
+            "617": 1.15404,
+            "618": 1.16198,
+            "619": 1.12381,
+            "620": 1.11492,
+            "621": 1.14943,
+            "622": 1.16512,
+            "623": 1.16958,
+            "624": 1.16409,
+            "625": 1.15844,
+            "626": 1.14917,
+            "627": 1.15285,
+            "628": 1.15477,
+            "629": 1.15363,
+            "630": 1.15213,
+            "631": 1.14647,
+            "632": 1.14867,
+            "633": 1.15423,
+            "634": 1.15566,
+            "635": 1.15345,
+            "636": 1.15319,
+            "637": 1.1511,
+            "638": 1.15409,
+            "639": 1.15188,
+            "640": 1.15258,
+            "641": 1.15414,
+            "642": 1.15983,
+            "643": 1.15819,
+            "644": 1.15887,
+            "645": 1.15631,
+            "646": 1.15765,
+            "647": 1.16277,
+            "648": 1.16768,
+            "649": 1.17095,
+            "650": 1.16972,
+            "651": 1.16894,
+            "652": 1.16584,
+            "653": 1.1612,
+            "654": 1.17303,
+            "655": 1.16406,
+            "656": 1.1617,
+            "657": 1.16573,
+            "658": 1.16082,
+            "659": 1.16677,
+            "660": 1.16969,
+            "661": 1.16374,
+            "662": 1.16155,
+            "663": 1.16674,
+            "664": 1.16865,
+            "665": 1.16719,
+            "666": 1.16772,
+            "667": 1.16872,
+            "668": 1.16616,
+            "669": 1.16505,
+            "670": 1.16449,
+            "671": 1.16777,
+            "672": 1.16457,
+            "673": 1.16059,
+            "674": 1.16013,
+            "675": 1.1589,
+            "676": 1.1645,
+            "677": 1.16737,
+            "678": 1.16262,
+            "679": 1.44417,
+            "680": 1.16641,
+            "681": 1.16441,
+            "682": 1.16834,
+            "683": 1.17163,
+            "684": 1.16041,
+            "685": 1.16815,
+            "686": 1.16615,
+            "687": 1.1689,
+            "688": 1.16377,
+            "689": 1.16277,
+            "690": 1.15926,
+            "691": 1.15823,
+            "692": 1.15747,
+            "693": 1.15897,
+            "694": 1.15722,
+            "695": 1.15679,
+            "696": 1.15619,
+            "697": 1.15686,
+            "698": 1.15548,
+            "699": 1.15619,
+            "700": 1.15662,
+            "701": 1.15701,
+            "702": 1.15611,
+            "703": 1.1578,
+            "704": 1.15921,
+            "705": 1.15626,
+            "706": 1.15696,
+            "707": 1.15676,
+            "708": 1.15718,
+            "709": 1.15643,
+            "710": 1.16154,
+            "711": 1.15995,
+            "712": 1.159,
+            "713": 1.16786,
+            "714": 1.15799,
+            "715": 1.15749,
+            "716": 1.52131,
+            "717": 1.15676,
+            "718": 1.16066,
+            "719": 1.15878,
+            "720": 1.16243,
+            "721": 1.15801,
+            "722": 1.16032,
+            "723": 1.15929,
+            "724": 1.16338,
+            "725": 1.15949,
+            "726": 1.16444,
+            "727": 1.31697,
+            "728": 1.15571,
+            "729": 1.15513,
+            "730": 1.15845,
+            "731": 1.16172,
+            "732": 1.15814,
+            "733": 1.1597,
+            "734": 1.15388,
+            "735": 1.15282,
+            "736": 1.15589,
+            "737": 1.15547,
+            "738": 1.1547,
+            "739": 1.15614,
+            "740": 1.15546,
+            "741": 1.15558,
+            "742": 1.15607,
+            "743": 1.15425,
+            "744": 1.15442,
+            "745": 1.16502,
+            "746": 1.15566,
+            "747": 1.15865,
+            "748": 1.15828,
+            "749": 1.16418,
+            "750": 1.15709,
+            "751": 1.15988,
+            "752": 1.15915,
+            "753": 1.15069,
+            "754": 1.15176,
+            "755": 1.15161,
+            "756": 1.1502,
+            "757": 1.14643,
+            "758": 1.7155,
+            "759": 1.15471,
+            "760": 1.15638,
+            "761": 1.15684,
+            "762": 1.16005,
+            "763": 1.1585,
+            "764": 1.16197,
+            "765": 1.22988,
+            "766": 1.16563,
+            "767": 1.16594,
+            "768": 1.16751,
+            "769": 1.16167,
+            "770": 1.16736,
+            "771": 1.16232,
+            "772": 1.16021,
+            "773": 1.16138,
+            "774": 1.16446,
+            "775": 1.15216,
+            "776": 1.15086,
+            "777": 1.15506,
+            "778": 1.15465,
+            "779": 1.15872,
+            "780": 1.15533,
+            "781": 1.15836,
+            "782": 1.15778,
+            "783": 1.21735,
+            "784": 1.15535,
+            "785": 1.14905,
+            "786": 1.14868,
+            "787": 1.14899,
+            "788": 1.1521,
+            "789": 1.1498,
+            "790": 1.15389,
+            "791": 1.15198,
+            "792": 1.14834,
+            "793": 1.14935,
+            "794": 1.14986,
+            "795": 1.15066,
+            "796": 1.15229,
+            "797": 1.15036,
+            "798": 1.15026,
+            "799": 1.15231,
+            "800": 1.15717,
+            "801": 1.15355,
+            "802": 1.15502,
+            "803": 1.15201,
+            "804": 1.15023,
+            "805": 1.15209,
+            "806": 1.15072,
+            "807": 1.48449,
+            "808": 1.15218,
+            "809": 1.1522,
+            "810": 1.15111,
+            "811": 1.15134,
+            "812": 1.15187,
+            "813": 1.15379,
+            "814": 1.15585,
+            "815": 1.16392,
+            "816": 1.15452,
+            "817": 1.15487,
+            "818": 1.15245,
+            "819": 1.14836,
+            "820": 1.14547,
+            "821": 1.74382,
+            "822": 1.14655,
+            "823": 1.13629,
+            "824": 1.15244,
+            "825": 1.14064,
+            "826": 1.14002,
+            "827": 1.14234,
+            "828": 1.1401,
+            "829": 1.13945,
+            "830": 1.14243,
+            "831": 1.14339,
+            "832": 1.13963,
+            "833": 1.14165,
+            "834": 1.13931,
+            "835": 1.13828,
+            "836": 1.13924,
+            "837": 1.13918,
+            "838": 1.14038,
+            "839": 1.14023,
+            "840": 1.13827,
+            "841": 1.14334,
+            "842": 1.26736,
+            "843": 1.15235,
+            "844": 1.16327,
+            "845": 1.15615,
+            "846": 1.15656,
+            "847": 1.14563,
+            "848": 1.14836,
+            "849": 1.14901,
+            "850": 1.14852,
+            "851": 1.15019,
+            "852": 1.14893,
+            "853": 1.14907,
+            "854": 1.14895,
+            "855": 1.14997,
+            "856": 1.14951,
+            "857": 1.15014,
+            "858": 1.14881,
+            "859": 1.15072,
+            "860": 1.16126,
+            "861": 1.15807,
+            "862": 1.15716,
+            "863": 1.15555,
+            "864": 1.15038,
+            "865": 1.15177,
+            "866": 1.15177,
+            "867": 1.14884,
+            "868": 1.14782,
+            "869": 1.15086,
+            "870": 1.14982,
+            "871": 1.14833,
+            "872": 1.14875,
+            "873": 1.15147,
+            "874": 1.15225,
+            "875": 1.29099,
+            "876": 2.39847,
+            "877": 2.16612,
+            "878": 1.53276,
+            "879": 1.14604,
+            "880": 1.1515,
+            "881": 1.16208,
+            "882": 1.15925,
+            "883": 1.14916,
+            "884": 1.14927,
+            "885": 1.1758,
+            "886": 1.17545,
+            "887": 1.17369,
+            "888": 1.17655,
+            "889": 1.16376,
+            "890": 1.14874,
+            "891": 1.148,
+            "892": 1.14787,
+            "893": 1.15123,
+            "894": 1.15168,
+            "895": 1.15419,
+            "896": 1.15535,
+            "897": 1.15242,
+            "898": 1.15508,
+            "899": 1.15225,
+            "900": 1.15072,
+            "901": 1.1534,
+            "902": 1.15136,
+            "903": 1.15481,
+            "904": 1.15989,
+            "905": 1.16184,
+            "906": 1.14716,
+            "907": 1.15192,
+            "908": 1.15696,
+            "909": 1.15328,
+            "910": 1.14059,
+            "911": 1.1604,
+            "912": 1.14941,
+            "913": 1.14972,
+            "914": 1.14954,
+            "915": 1.15073,
+            "916": 1.14475,
+            "917": 1.15414,
+            "918": 1.1385,
+            "919": 1.14185,
+            "920": 1.14089,
+            "921": 1.13784,
+            "922": 1.13875,
+            "923": 1.13882,
+            "924": 1.14141,
+            "925": 1.13908,
+            "926": 1.13874,
+            "927": 1.13823,
+            "928": 1.13737,
+            "929": 1.13836,
+            "930": 1.13809,
+            "931": 1.14893,
+            "932": 1.13972,
+            "933": 1.1369,
+            "934": 1.1362,
+            "935": 1.13765,
+            "936": 1.14369,
+            "937": 1.1504,
+            "938": 1.14208,
+            "939": 1.14841,
+            "940": 1.14975,
+            "941": 1.14225,
+            "942": 1.14185,
+            "943": 1.13864,
+            "944": 1.13915,
+            "945": 1.14062,
+            "946": 1.15111,
+            "947": 1.14071,
+            "948": 1.13898,
+            "949": 1.1399,
+            "950": 1.15937,
+            "951": 1.16785,
+            "952": 1.16807,
+            "953": 1.1506,
+            "954": 1.15006,
+            "955": 1.15045,
+            "956": 1.17067,
+            "957": 1.14856,
+            "958": 1.14992,
+            "959": 1.15251,
+            "960": 1.15045,
+            "961": 1.15121,
+            "962": 1.14957,
+            "963": 1.15095,
+            "964": 1.15,
+            "965": 1.15089,
+            "966": 1.15156,
+            "967": 1.15423,
+            "968": 1.16332,
+            "969": 1.15359,
+            "970": 1.15613,
+            "971": 1.15232,
+            "972": 1.15652,
+            "973": 1.15399,
+            "974": 1.15065,
+            "975": 1.1485,
+            "976": 1.15243,
+            "977": 1.15368,
+            "978": 1.14828,
+            "979": 1.14969,
+            "980": 1.15374,
+            "981": 1.1505,
+            "982": 1.15031,
+            "983": 1.15033,
+            "984": 1.14921,
+            "985": 1.15504,
+            "986": 1.15572,
+            "987": 1.153,
+            "988": 1.15573,
+            "989": 1.14747,
+            "990": 1.14636,
+            "991": 1.14517,
+            "992": 1.1463,
+            "993": 1.14805,
+            "994": 1.14644,
+            "995": 1.14583,
+            "996": 1.14485,
+            "997": 1.14418,
+            "998": 1.14622,
+            "999": 1.14662,
+            "1000": 1.14312,
+            "1001": 1.15227,
+            "1002": 1.14681,
+            "1003": 1.14794,
+            "1004": 1.14889,
+            "1005": 1.15067,
+            "1006": 1.14757,
+            "1007": 1.14767,
+            "1008": 1.15061,
+            "1009": 1.15075,
+            "1010": 1.14894,
+            "1011": 1.14975,
+            "1012": 1.14667,
+            "1013": 1.14688,
+            "1014": 1.14788,
+            "1015": 1.167,
+            "1016": 1.44606,
+            "1017": 1.14923,
+            "1018": 1.15268,
+            "1019": 1.14981,
+            "1020": 1.15011,
+            "1021": 1.47391,
+            "1022": 1.15277,
+            "1023": 1.14774,
+            "1024": 1.146,
+            "1025": 1.15253,
+            "1026": 1.14633,
+            "1027": 1.14525,
+            "1028": 1.14728,
+            "1029": 1.14654,
+            "1030": 1.14663,
+            "1031": 1.14708,
+            "1032": 1.14715,
+            "1033": 1.1454,
+            "1034": 1.14763,
+            "1035": 1.14591,
+            "1036": 1.14493,
+            "1037": 1.14584,
+            "1038": 1.14665,
+            "1039": 1.14812,
+            "1040": 1.14495,
+            "1041": 1.15044,
+            "1042": 1.14701,
+            "1043": 1.14657,
+            "1044": 1.14631,
+            "1045": 1.14822,
+            "1046": 1.14789,
+            "1047": 1.14525,
+            "1048": 1.14815,
+            "1049": 1.14939,
+            "1050": 1.14592,
+            "1051": 1.14667,
+            "1052": 1.15232,
+            "1053": 1.14863,
+            "1054": 1.14908,
+            "1055": 1.14931,
+            "1056": 1.14644,
+            "1057": 1.149,
+            "1058": 1.14751,
+            "1059": 1.14668,
+            "1060": 1.14758,
+            "1061": 1.14789,
+            "1062": 1.43562,
+            "1063": 1.14875,
+            "1064": 1.14846,
+            "1065": 1.14888,
+            "1066": 1.15486,
+            "1067": 1.15212,
+            "1068": 1.14934,
+            "1069": 1.14526,
+            "1070": 1.14506,
+            "1071": 1.14599,
+            "1072": 1.14774,
+            "1073": 1.14651,
+            "1074": 1.14609,
+            "1075": 1.14817,
+            "1076": 1.14662,
+            "1077": 1.15159,
+            "1078": 1.14735,
+            "1079": 1.14525,
+            "1080": 1.1516,
+            "1081": 1.14601,
+            "1082": 1.13989,
+            "1083": 1.13569,
+            "1084": 1.1371,
+            "1085": 1.1366,
+            "1086": 1.13713,
+            "1087": 1.13756,
+            "1088": 1.13768,
+            "1089": 1.13917,
+            "1090": 1.13759,
+            "1091": 1.13884,
+            "1092": 1.13707,
+            "1093": 1.13679,
+            "1094": 1.13513,
+            "1095": 1.1351,
+            "1096": 1.13494,
+            "1097": 1.13589,
+            "1098": 1.14132,
+            "1099": 1.13697,
+            "1100": 1.14195,
+            "1101": 1.14189,
+            "1102": 1.13736,
+            "1103": 1.13781,
+            "1104": 1.14284,
+            "1105": 1.13518,
+            "1106": 1.13585,
+            "1107": 1.13621,
+            "1108": 1.13665,
+            "1109": 1.13792,
+            "1110": 1.13764,
+            "1111": 1.13778,
+            "1112": 1.13619,
+            "1113": 1.13651,
+            "1114": 1.13628,
+            "1115": 1.13802,
+            "1116": 1.13792,
+            "1117": 1.13642,
+            "1118": 1.13784,
+            "1119": 1.14898,
+            "1120": 1.15049,
+            "1121": 1.15028,
+            "1122": 1.14509,
+            "1123": 1.1445,
+            "1124": 1.14756,
+            "1125": 1.15117,
+            "1126": 1.14917,
+            "1127": 1.1475,
+            "1128": 1.1481,
+            "1129": 1.14683,
+            "1130": 1.14088,
+            "1131": 1.13493,
+            "1132": 1.13613,
+            "1133": 1.13537,
+            "1134": 1.13473,
+            "1135": 1.13657,
+            "1136": 1.13516,
+            "1137": 1.13606,
+            "1138": 1.13473,
+            "1139": 1.13442,
+            "1140": 1.13398,
+            "1141": 1.13591,
+            "1142": 1.13975,
+            "1143": 1.13478,
+            "1144": 1.13376,
+            "1145": 1.13428,
+            "1146": 1.1348,
+            "1147": 1.13462,
+            "1148": 1.1351,
+            "1149": 1.13494,
+            "1150": 1.13506,
+            "1151": 1.13487,
+            "1152": 1.14039,
+            "1153": 1.13991,
+            "1154": 1.13825,
+            "1155": 1.1373,
+            "1156": 1.13451,
+            "1157": 1.13683,
+            "1158": 1.13335,
+            "1159": 1.13548,
+            "1160": 1.1339,
+            "1161": 1.13613,
+            "1162": 1.13429,
+            "1163": 1.13448,
+            "1164": 1.13542,
+            "1165": 1.13453,
+            "1166": 1.13398,
+            "1167": 1.13549,
+            "1168": 1.1342,
+            "1169": 1.13502,
+            "1170": 1.13535,
+            "1171": 1.13581,
+            "1172": 1.13532,
+            "1173": 1.13552,
+            "1174": 1.13371,
+            "1175": 1.13456,
+            "1176": 1.13401,
+            "1177": 1.1335,
+            "1178": 1.13628,
+            "1179": 1.13907,
+            "1180": 1.13757,
+            "1181": 1.1538,
+            "1182": 1.15712,
+            "1183": 1.16123,
+            "1184": 1.15318,
+            "1185": 1.14801,
+            "1186": 1.14711,
+            "1187": 1.1471,
+            "1188": 1.15109,
+            "1189": 1.14707,
+            "1190": 1.14787,
+            "1191": 1.1451,
+            "1192": 1.14677,
+            "1193": 1.14621,
+            "1194": 1.14554,
+            "1195": 1.14738,
+            "1196": 1.14756,
+            "1197": 1.14799,
+            "1198": 1.1487,
+            "1199": 1.14616,
+            "1200": 1.14688,
+            "1201": 1.14531,
+            "1202": 1.14639,
+            "1203": 1.14696,
+            "1204": 1.1469,
+            "1205": 1.1472,
+            "1206": 1.14687,
+            "1207": 1.1494,
+            "1208": 1.14873,
+            "1209": 1.15175,
+            "1210": 1.14868,
+            "1211": 1.14793,
+            "1212": 1.14766,
+            "1213": 1.14823,
+            "1214": 1.15557,
+            "1215": 1.15986,
+            "1216": 1.14175,
+            "1217": 1.1392,
+            "1218": 1.13591,
+            "1219": 1.13796,
+            "1220": 1.14086,
+            "1221": 1.14081,
+            "1222": 1.13816,
+            "1223": 1.13977,
+            "1224": 1.14436,
+            "1225": 1.13986,
+            "1226": 1.13821,
+            "1227": 1.13854,
+            "1228": 1.13738,
+            "1229": 1.1384,
+            "1230": 1.13897,
+            "1231": 1.13732,
+            "1232": 1.13852,
+            "1233": 1.14144,
+            "1234": 1.13711,
+            "1235": 1.14105,
+            "1236": 1.13578,
+            "1237": 1.13838,
+            "1238": 1.13809,
+            "1239": 1.13782,
+            "1240": 1.13859,
+            "1241": 1.1381,
+            "1242": 1.13717,
+            "1243": 1.14814,
+            "1244": 1.16451,
+            "1245": 1.17765,
+            "1246": 1.17167,
+            "1247": 1.15708,
+            "1248": 1.15406,
+            "1249": 1.17391,
+            "1250": 1.14803,
+            "1251": 1.14601,
+            "1252": 1.14796,
+            "1253": 1.14706,
+            "1254": 1.14679,
+            "1255": 1.14306,
+            "1256": 1.14387,
+            "1257": 1.14608,
+            "1258": 1.14617,
+            "1259": 1.14999,
+            "1260": 1.1468,
+            "1261": 1.14332,
+            "1262": 1.15005,
+            "1263": 1.1449,
+            "1264": 1.14544,
+            "1265": 1.14292,
+            "1266": 1.14481,
+            "1267": 1.154,
+            "1268": 1.15455,
+            "1269": 1.15329,
+            "1270": 1.15008,
+            "1271": 1.15345,
+            "1272": 1.14616,
+            "1273": 1.15423,
+            "1274": 1.15349,
+            "1275": 1.14785,
+            "1276": 1.14536,
+            "1277": 1.14467,
+            "1278": 1.1456,
+            "1279": 1.14593,
+            "1280": 1.1462,
+            "1281": 1.14599,
+            "1282": 1.14837,
+            "1283": 1.14585,
+            "1284": 1.14656,
+            "1285": 1.14618,
+            "1286": 1.14615,
+            "1287": 1.14657,
+            "1288": 1.44686,
+            "1289": 1.14572,
+            "1290": 1.14398,
+            "1291": 1.1431,
+            "1292": 1.14524,
+            "1293": 1.14421,
+            "1294": 1.14593,
+            "1295": 1.16051,
+            "1296": 1.16214,
+            "1297": 1.15606,
+            "1298": 1.14439,
+            "1299": 1.14445,
+            "1300": 1.1445,
+            "1301": 1.1455,
+            "1302": 1.14117,
+            "1303": 1.14365,
+            "1304": 1.14474,
+            "1305": 1.14456,
+            "1306": 1.14522,
+            "1307": 1.144,
+            "1308": 1.14453,
+            "1309": 1.14471,
+            "1310": 1.1456,
+            "1311": 1.15495,
+            "1312": 1.15256,
+            "1313": 1.14805,
+            "1314": 1.14996,
+            "1315": 1.14425,
+            "1316": 1.14401,
+            "1317": 1.14262,
+            "1318": 1.14556,
+            "1319": 1.14661,
+            "1320": 1.14567,
+            "1321": 1.14648,
+            "1322": 1.14709,
+            "1323": 1.14522,
+            "1324": 1.14764,
+            "1325": 1.14331,
+            "1326": 1.14538,
+            "1327": 1.1453,
+            "1328": 1.14734,
+            "1329": 1.18619,
+            "1330": 1.48212,
+            "1331": 1.14651,
+            "1332": 1.15204,
+            "1333": 1.14629,
+            "1334": 1.14624,
+            "1335": 1.14927,
+            "1336": 1.14601,
+            "1337": 1.15642,
+            "1338": 1.14811,
+            "1339": 1.14508,
+            "1340": 1.15069,
+            "1341": 1.14629,
+            "1342": 1.14635,
+            "1343": 1.14657,
+            "1344": 1.14655,
+            "1345": 1.14564,
+            "1346": 1.14633,
+            "1347": 1.14523,
+            "1348": 1.14691,
+            "1349": 1.14575,
+            "1350": 1.14592,
+            "1351": 1.14631,
+            "1352": 1.14436,
+            "1353": 1.14573,
+            "1354": 1.14471,
+            "1355": 1.14554,
+            "1356": 1.14492,
+            "1357": 1.14301,
+            "1358": 1.141,
+            "1359": 1.14219,
+            "1360": 1.14228,
+            "1361": 1.14109,
+            "1362": 1.1413,
+            "1363": 1.14096,
+            "1364": 1.15355,
+            "1365": 1.14229,
+            "1366": 1.14615,
+            "1367": 1.14174,
+            "1368": 1.13953,
+            "1369": 1.14014,
+            "1370": 1.14132,
+            "1371": 1.14139,
+            "1372": 1.13849,
+            "1373": 1.14304,
+            "1374": 1.14028,
+            "1375": 1.13912,
+            "1376": 1.14082,
+            "1377": 1.1416,
+            "1378": 1.13936,
+            "1379": 1.13866,
+            "1380": 1.13826,
+            "1381": 1.14443,
+            "1382": 1.14029,
+            "1383": 1.13913,
+            "1384": 1.14177,
+            "1385": 1.14492,
+            "1386": 1.1415,
+            "1387": 1.1398,
+            "1388": 1.14017,
+            "1389": 1.14077,
+            "1390": 1.14782,
+            "1391": 1.15011,
+            "1392": 1.15174,
+            "1393": 1.14605,
+            "1394": 1.14761,
+            "1395": 1.14735,
+            "1396": 1.14827,
+            "1397": 1.14566,
+            "1398": 1.14659,
+            "1399": 1.14187,
+            "1400": 1.14737,
+            "1401": 1.14674,
+            "1402": 1.14468,
+            "1403": 1.14534,
+            "1404": 1.14726,
+            "1405": 1.14773,
+            "1406": 1.14711,
+            "1407": 1.14543,
+            "1408": 1.14568,
+            "1409": 1.14559,
+            "1410": 1.14443,
+            "1411": 1.14591,
+            "1412": 1.14444,
+            "1413": 1.14904,
+            "1414": 1.14806,
+            "1415": 1.14757,
+            "1416": 1.14307,
+            "1417": 1.14119,
+            "1418": 1.14392,
+            "1419": 1.14104,
+            "1420": 1.14278,
+            "1421": 1.13949,
+            "1422": 1.14028,
+            "1423": 1.14112,
+            "1424": 1.14151,
+            "1425": 1.14321,
+            "1426": 1.14894,
+            "1427": 1.14281,
+            "1428": 1.14881,
+            "1429": 1.14225,
+            "1430": 1.13905,
+            "1431": 1.14148,
+            "1432": 1.14895,
+            "1433": 1.15186,
+            "1434": 1.14773,
+            "1435": 1.14968,
+            "1436": 1.14689,
+            "1437": 1.1487,
+            "1438": 1.14731,
+            "1439": 1.14746,
+            "1440": 1.14835,
+            "1441": 1.15151,
+            "1442": 1.15182,
+            "1443": 1.15073,
+            "1444": 1.14751,
+            "1445": 1.15081,
+            "1446": 1.15106,
+            "1447": 1.14876,
+            "1448": 1.15178,
+            "1449": 1.15117,
+            "1450": 1.1479,
+            "1451": 1.14851,
+            "1452": 1.14502,
+            "1453": 1.1454,
+            "1454": 1.14722,
+            "1455": 1.14628,
+            "1456": 1.14413,
+            "1457": 1.14761,
+            "1458": 1.14681,
+            "1459": 1.14632,
+            "1460": 1.14804,
+            "1461": 1.14676,
+            "1462": 1.14566,
+            "1463": 1.14599,
+            "1464": 1.14679,
+            "1465": 1.14572,
+            "1466": 1.14995,
+            "1467": 1.14848,
+            "1468": 1.14679,
+            "1469": 1.15027,
+            "1470": 1.14636,
+            "1471": 1.14406,
+            "1472": 1.14039,
+            "1473": 1.13768,
+            "1474": 1.13897,
+            "1475": 1.14331,
+            "1476": 1.1403,
+            "1477": 1.14139,
+            "1478": 1.14985,
+            "1479": 1.14611,
+            "1480": 1.47655,
+            "1481": 1.45511,
+            "1482": 1.14381,
+            "1483": 1.13941,
+            "1484": 1.13782,
+            "1485": 1.13771,
+            "1486": 1.13796,
+            "1487": 1.13795,
+            "1488": 1.13829,
+            "1489": 1.13758,
+            "1490": 1.13822,
+            "1491": 1.13667,
+            "1492": 1.13847,
+            "1493": 1.13787,
+            "1494": 1.14072,
+            "1495": 1.14614,
+            "1496": 1.14436,
+            "1497": 1.14422,
+            "1498": 1.1393,
+            "1499": 1.13987,
+            "1500": 1.13991,
+            "1501": 1.14215,
+            "1502": 1.13842,
+            "1503": 1.13883,
+            "1504": 1.1496,
+            "1505": 1.14028,
+            "1506": 1.13931,
+            "1507": 1.13949,
+            "1508": 1.14063,
+            "1509": 1.13913,
+            "1510": 1.1402,
+            "1511": 1.13931,
+            "1512": 1.13839,
+            "1513": 1.13771,
+            "1514": 1.13848,
+            "1515": 1.13796,
+            "1516": 1.13782,
+            "1517": 1.13889,
+            "1518": 1.13716,
+            "1519": 1.13908,
+            "1520": 1.13972,
+            "1521": 1.13966,
+            "1522": 1.13875,
+            "1523": 1.15781,
+            "1524": 1.15885,
+            "1525": 1.15802,
+            "1526": 1.14191,
+            "1527": 1.14054,
+            "1528": 1.1385,
+            "1529": 1.13922,
+            "1530": 1.12994,
+            "1531": 1.12552,
+            "1532": 1.27166,
+            "1533": 1.12707,
+            "1534": 1.12638,
+            "1535": 1.12608,
+            "1536": 1.12654,
+            "1537": 1.12511,
+            "1538": 1.16008,
+            "1539": 1.13169,
+            "1540": 1.13294,
+            "1541": 1.13386,
+            "1542": 1.13461,
+            "1543": 1.13337,
+            "1544": 1.1331,
+            "1545": 1.13294,
+            "1546": 1.13283,
+            "1547": 1.13316,
+            "1548": 1.13651,
+            "1549": 1.13626,
+            "1550": 1.13638,
+            "1551": 1.13187,
+            "1552": 1.20522,
+            "1553": 1.15894,
+            "1554": 1.14738,
+            "1555": 1.14563,
+            "1556": 1.14409,
+            "1557": 1.15018,
+            "1558": 1.14323,
+            "1559": 1.14591,
+            "1560": 1.14645,
+            "1561": 1.14673,
+            "1562": 1.14543,
+            "1563": 1.14518,
+            "1564": 1.14589,
+            "1565": 1.14486,
+            "1566": 1.14436,
+            "1567": 1.14357,
+            "1568": 1.1454,
+            "1569": 1.14493,
+            "1570": 1.14347,
+            "1571": 1.14477,
+            "1572": 1.14203,
+            "1573": 1.14441,
+            "1574": 1.14468,
+            "1575": 1.14607,
+            "1576": 1.14532,
+            "1577": 1.14389,
+            "1578": 1.1433,
+            "1579": 1.14321,
+            "1580": 1.14391,
+            "1581": 1.1421,
+            "1582": 1.14368,
+            "1583": 1.1444,
+            "1584": 1.14356,
+            "1585": 1.14875,
+            "1586": 1.14497,
+            "1587": 1.14521,
+            "1588": 1.14708,
+            "1589": 1.14631,
+            "1590": 1.14662,
+            "1591": 1.14949,
+            "1592": 1.15354,
+            "1593": 1.14014,
+            "1594": 1.1408,
+            "1595": 1.14166,
+            "1596": 1.14151,
+            "1597": 1.14228,
+            "1598": 1.14126,
+            "1599": 1.14028,
+            "1600": 1.14528,
+            "1601": 1.14125,
+            "1602": 1.14085,
+            "1603": 1.13862,
+            "1604": 1.13487,
+            "1605": 1.13314,
+            "1606": 1.13467,
+            "1607": 1.13153,
+            "1608": 1.12971,
+            "1609": 1.13044,
+            "1610": 1.14013,
+            "1611": 1.13008,
+            "1612": 1.13161,
+            "1613": 1.13128,
+            "1614": 1.13059,
+            "1615": 1.13169,
+            "1616": 1.13043,
+            "1617": 1.13141,
+            "1618": 1.12976,
+            "1619": 1.13071,
+            "1620": 1.12907,
+            "1621": 1.13138,
+            "1622": 1.12994,
+            "1623": 1.12985,
+            "1624": 1.12999,
+            "1625": 1.13035,
+            "1626": 1.13761,
+            "1627": 1.13703,
+            "1628": 1.15487,
+            "1629": 1.13257,
+            "1630": 1.13549,
+            "1631": 1.13358,
+            "1632": 1.13488,
+            "1633": 1.13601,
+            "1634": 1.13282,
+            "1635": 1.13439,
+            "1636": 1.13078,
+            "1637": 1.13147,
+            "1638": 1.13065,
+            "1639": 1.13181,
+            "1640": 1.13227,
+            "1641": 1.13282,
+            "1642": 1.13305,
+            "1643": 1.19491,
+            "1644": 1.15821,
+            "1645": 1.15349,
+            "1646": 1.1437,
+            "1647": 1.1416,
+            "1648": 1.14282,
+            "1649": 1.1408,
+            "1650": 1.13388,
+            "1651": 1.13396,
+            "1652": 1.15414,
+            "1653": 1.13734,
+            "1654": 1.13143,
+            "1655": 1.13124,
+            "1656": 1.13417,
+            "1657": 1.13376,
+            "1658": 1.12932,
+            "1659": 1.13161,
+            "1660": 1.13178,
+            "1661": 1.1315,
+            "1662": 1.13209,
+            "1663": 1.13118,
+            "1664": 1.13332,
+            "1665": 1.12981,
+            "1666": 1.13001,
+            "1667": 1.12943,
+            "1668": 1.12938,
+            "1669": 1.12973,
+            "1670": 1.13031,
+            "1671": 1.14164,
+            "1672": 1.14108,
+            "1673": 1.14165,
+            "1674": 1.14189,
+            "1675": 1.14174,
+            "1676": 1.14802,
+            "1677": 1.14434,
+            "1678": 1.14543,
+            "1679": 1.14285,
+            "1680": 1.14529,
+            "1681": 1.14548,
+            "1682": 1.14333,
+            "1683": 1.14553,
+            "1684": 1.14327,
+            "1685": 1.1476,
+            "1686": 1.1406,
+            "1687": 1.13769,
+            "1688": 1.13364,
+            "1689": 1.13418,
+            "1690": 1.13026,
+            "1691": 1.13222,
+            "1692": 1.13195,
+            "1693": 1.13247,
+            "1694": 1.13264,
+            "1695": 1.13167,
+            "1696": 1.13234,
+            "1697": 1.13335,
+            "1698": 1.13463,
+            "1699": 1.1337,
+            "1700": 1.13362,
+            "1701": 1.13339,
+            "1702": 1.13335,
+            "1703": 1.13412,
+            "1704": 1.1332,
+            "1705": 1.13109,
+            "1706": 1.13306,
+            "1707": 1.42699,
+            "1708": 1.14258,
+            "1709": 1.13227,
+            "1710": 1.13333,
+            "1711": 1.13316,
+            "1712": 1.13147,
+            "1713": 1.1325,
+            "1714": 1.13279,
+            "1715": 1.13509,
+            "1716": 1.132,
+            "1717": 1.13183,
+            "1718": 1.13123,
+            "1719": 1.13209,
+            "1720": 1.13195,
+            "1721": 1.12891,
+            "1722": 1.12633,
+            "1723": 1.12872,
+            "1724": 1.1269,
+            "1725": 1.12641,
+            "1726": 1.12585,
+            "1727": 1.12446,
+            "1728": 1.12583,
+            "1729": 1.1336,
+            "1730": 1.1322,
+            "1731": 1.13153,
+            "1732": 1.132,
+            "1733": 1.13239,
+            "1734": 1.13216,
+            "1735": 1.13252,
+            "1736": 1.13132,
+            "1737": 1.13165,
+            "1738": 1.13359,
+            "1739": 1.126,
+            "1740": 1.124,
+            "1741": 1.12533,
+            "1742": 1.12379,
+            "1743": 1.12474,
+            "1744": 1.12432,
+            "1745": 1.13505,
+            "1746": 1.13795,
+            "1747": 1.13914,
+            "1748": 1.17805,
+            "1749": 1.13962,
+            "1750": 1.13602,
+            "1751": 1.13778,
+            "1752": 1.13639,
+            "1753": 1.14452,
+            "1754": 1.14424,
+            "1755": 1.14388,
+            "1756": 1.14572,
+            "1757": 1.17074,
+            "1758": 1.14596,
+            "1759": 1.14637,
+            "1760": 1.14576,
+            "1761": 1.1441,
+            "1762": 1.13385,
+            "1763": 1.13833,
+            "1764": 1.13995,
+            "1765": 1.14229,
+            "1766": 1.2706,
+            "1767": 1.15999,
+            "1768": 1.13873,
+            "1769": 1.1421,
+            "1770": 1.13078,
+            "1771": 1.13059,
+            "1772": 1.13076,
+            "1773": 1.13527,
+            "1774": 1.13153,
+            "1775": 1.1299,
+            "1776": 1.13144,
+            "1777": 1.13048,
+            "1778": 1.1312,
+            "1779": 1.13109,
+            "1780": 1.13227,
+            "1781": 1.1318,
+            "1782": 1.13195,
+            "1783": 1.13076,
+            "1784": 1.13371,
+            "1785": 1.13513,
+            "1786": 1.13544,
+            "1787": 1.13286,
+            "1788": 1.13114,
+            "1789": 1.12859,
+            "1790": 1.13136,
+            "1791": 1.13775,
+            "1792": 1.1401,
+            "1793": 1.13769,
+            "1794": 1.13564,
+            "1795": 1.13638,
+            "1796": 1.13621,
+            "1797": 1.13614,
+            "1798": 1.13707,
+            "1799": 1.13631,
+            "1800": 1.13547,
+            "1801": 1.13673,
+            "1802": 1.13706,
+            "1803": 1.13765,
+            "1804": 1.13506,
+            "1805": 1.13603,
+            "1806": 1.13717,
+            "1807": 1.13637,
+            "1808": 1.13841,
+            "1809": 1.13734,
+            "1810": 1.1379,
+            "1811": 1.13795,
+            "1812": 1.13826,
+            "1813": 1.13875,
+            "1814": 1.13885,
+            "1815": 1.13773,
+            "1816": 1.13726,
+            "1817": 1.14087,
+            "1818": 1.1378,
+            "1819": 1.13714,
+            "1820": 1.13737,
+            "1821": 1.13928,
+            "1822": 1.1371,
+            "1823": 1.13901,
+            "1824": 1.14485,
+            "1825": 1.12803,
+            "1826": 1.12264,
+            "1827": 1.12651,
+            "1828": 1.13421,
+            "1829": 1.13198,
+            "1830": 1.13242,
+            "1831": 1.13488,
+            "1832": 1.13287,
+            "1833": 1.13394,
+            "1834": 1.13403,
+            "1835": 1.13598,
+            "1836": 1.13357,
+            "1837": 1.13518,
+            "1838": 1.13404,
+            "1839": 1.13577,
+            "1840": 1.13254,
+            "1841": 1.13422,
+            "1842": 1.13496,
+            "1843": 1.135,
+            "1844": 1.13791,
+            "1845": 1.13082,
+            "1846": 1.13135,
+            "1847": 1.13026,
+            "1848": 1.13098,
+            "1849": 1.13032,
+            "1850": 1.13038,
+            "1851": 1.13107,
+            "1852": 1.13535,
+            "1853": 1.1311,
+            "1854": 1.13935,
+            "1855": 1.13148,
+            "1856": 1.13042,
+            "1857": 1.13238,
+            "1858": 1.13034,
+            "1859": 1.13083,
+            "1860": 1.13262,
+            "1861": 1.13117,
+            "1862": 1.13181,
+            "1863": 1.13237,
+            "1864": 1.13125,
+            "1865": 1.13519,
+            "1866": 1.14006,
+            "1867": 1.13476,
+            "1868": 1.13101,
+            "1869": 1.13227,
+            "1870": 1.13399,
+            "1871": 1.13455,
+            "1872": 1.13237,
+            "1873": 1.13088,
+            "1874": 1.13163,
+            "1875": 1.13336,
+            "1876": 1.13121,
+            "1877": 1.13209,
+            "1878": 1.13199,
+            "1879": 1.13177,
+            "1880": 1.13322,
+            "1881": 1.13141,
+            "1882": 1.13236,
+            "1883": 1.12859,
+            "1884": 1.12504,
+            "1885": 1.12493,
+            "1886": 1.12502,
+            "1887": 1.12484,
+            "1888": 1.1248,
+            "1889": 1.12719,
+            "1890": 1.13286,
+            "1891": 1.1293,
+            "1892": 1.13422,
+            "1893": 1.12646,
+            "1894": 1.12508,
+            "1895": 1.12422,
+            "1896": 1.12724,
+            "1897": 1.12903,
+            "1898": 1.13203,
+            "1899": 1.12741,
+            "1900": 1.12527,
+            "1901": 1.12359,
+            "1902": 1.12382,
+            "1903": 1.12536,
+            "1904": 1.12683,
+            "1905": 1.12606,
+            "1906": 1.12607,
+            "1907": 1.12626,
+            "1908": 1.44717,
+            "1909": 1.12543,
+            "1910": 1.12376,
+            "1911": 1.12429,
+            "1912": 1.12442,
+            "1913": 1.12355,
+            "1914": 1.12476,
+            "1915": 1.12331,
+            "1916": 1.12342,
+            "1917": 1.12442,
+            "1918": 1.12472,
+            "1919": 1.12536,
+            "1920": 1.12387,
+            "1921": 1.12347,
+            "1922": 1.12561,
+            "1923": 1.12391,
+            "1924": 1.12342,
+            "1925": 1.12607,
+            "1926": 1.12383,
+            "1927": 1.12305,
+            "1928": 1.125,
+            "1929": 1.12399,
+            "1930": 1.1237,
+            "1931": 1.12459,
+            "1932": 1.12475,
+            "1933": 1.12278,
+            "1934": 1.12413,
+            "1935": 1.12588,
+            "1936": 1.12473,
+            "1937": 1.12412,
+            "1938": 1.12444,
+            "1939": 1.12303,
+            "1940": 1.12421,
+            "1941": 1.12404,
+            "1942": 1.12568,
+            "1943": 1.12645,
+            "1944": 1.12388,
+            "1945": 1.44561,
+            "1946": 1.12748,
+            "1947": 1.44404,
+            "1948": 1.12309,
+            "1949": 1.12591,
+            "1950": 1.124,
+            "1951": 1.12953,
+            "1952": 1.12429,
+            "1953": 1.48105,
+            "1954": 1.12576,
+            "1955": 1.1274,
+            "1956": 1.12693,
+            "1957": 1.1261,
+            "1958": 1.1276,
+            "1959": 1.18913,
+            "1960": 1.12817,
+            "1961": 1.12615,
+            "1962": 1.12581,
+            "1963": 1.12682,
+            "1964": 1.12747,
+            "1965": 1.14301,
+            "1966": 1.14417,
+            "1967": 1.14427,
+            "1968": 1.14017,
+            "1969": 1.13872,
+            "1970": 1.13824,
+            "1971": 1.14731,
+            "1972": 1.13727,
+            "1973": 1.13816,
+            "1974": 1.13684,
+            "1975": 1.13985,
+            "1976": 1.13777,
+            "1977": 1.13833,
+            "1978": 1.14247,
+            "1979": 1.14554,
+            "1980": 1.14074,
+            "1981": 1.1396,
+            "1982": 1.13784,
+            "1983": 1.19896,
+            "1984": 1.13952,
+            "1985": 1.13865,
+            "1986": 1.13959,
+            "1987": 1.13909,
+            "1988": 1.13875,
+            "1989": 1.13947,
+            "1990": 1.13762,
+            "1991": 1.13799,
+            "1992": 1.13904,
+            "1993": 1.13674,
+            "1994": 1.13869,
+            "1995": 1.13884,
+            "1996": 1.13807,
+            "1997": 1.13986,
+            "1998": 1.14151,
+            "1999": 1.13582,
+            "2000": 1.16726
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..b6e543e2cf8
--- /dev/null
+++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json
@@ -0,0 +1,10037 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 10.85229,
+            "2": 10.85951,
+            "3": 10.85469,
+            "4": 10.86843,
+            "5": 10.85304,
+            "6": 10.85362,
+            "7": 10.8602,
+            "8": 10.85298,
+            "9": 10.84874,
+            "10": 10.84674,
+            "11": 10.83863,
+            "12": 10.83549,
+            "13": 10.82524,
+            "14": 10.84078,
+            "15": 10.78613,
+            "16": 10.79372,
+            "17": 10.76553,
+            "18": 10.78902,
+            "19": 10.73057,
+            "20": 10.69489,
+            "21": 10.64595,
+            "22": 10.64791,
+            "23": 10.65524,
+            "24": 10.55349,
+            "25": 10.56424,
+            "26": 10.63262,
+            "27": 10.47084,
+            "28": 10.471,
+            "29": 10.36495,
+            "30": 10.27406,
+            "31": 10.43126,
+            "32": 10.35361,
+            "33": 10.22439,
+            "34": 10.17135,
+            "35": 10.23744,
+            "36": 10.15766,
+            "37": 10.10704,
+            "38": 10.03631,
+            "39": 10.04895,
+            "40": 10.06978,
+            "41": 9.95276,
+            "42": 9.95577,
+            "43": 9.87217,
+            "44": 9.99154,
+            "45": 10.00766,
+            "46": 9.84803,
+            "47": 10.00018,
+            "48": 9.81816,
+            "49": 9.94941,
+            "50": 9.94449,
+            "51": 9.5964,
+            "52": 9.79483,
+            "53": 9.63207,
+            "54": 9.8854,
+            "55": 9.74063,
+            "56": 9.85006,
+            "57": 9.86123,
+            "58": 9.87737,
+            "59": 9.54716,
+            "60": 9.64756,
+            "61": 9.87994,
+            "62": 9.76465,
+            "63": 9.68066,
+            "64": 9.82801,
+            "65": 9.59733,
+            "66": 9.62928,
+            "67": 9.74212,
+            "68": 9.60593,
+            "69": 9.29694,
+            "70": 9.42495,
+            "71": 9.79013,
+            "72": 9.71358,
+            "73": 9.61909,
+            "74": 9.45334,
+            "75": 9.24289,
+            "76": 9.50821,
+            "77": 9.57857,
+            "78": 9.56035,
+            "79": 9.31048,
+            "80": 9.36161,
+            "81": 9.46136,
+            "82": 9.55628,
+            "83": 9.53353,
+            "84": 9.35526,
+            "85": 9.40111,
+            "86": 9.65137,
+            "87": 9.23621,
+            "88": 9.48942,
+            "89": 9.22457,
+            "90": 9.41443,
+            "91": 9.39014,
+            "92": 9.3793,
+            "93": 9.36366,
+            "94": 9.51552,
+            "95": 9.42012,
+            "96": 9.33698,
+            "97": 9.20729,
+            "98": 9.49265,
+            "99": 9.29333,
+            "100": 9.35883,
+            "101": 9.24766,
+            "102": 9.24259,
+            "103": 9.07796,
+            "104": 9.16832,
+            "105": 9.37671,
+            "106": 9.15179,
+            "107": 9.17832,
+            "108": 9.31483,
+            "109": 9.28984,
+            "110": 9.36705,
+            "111": 9.17605,
+            "112": 9.23281,
+            "113": 9.35413,
+            "114": 9.35742,
+            "115": 9.32337,
+            "116": 9.00364,
+            "117": 9.06445,
+            "118": 9.06523,
+            "119": 9.22504,
+            "120": 9.08324,
+            "121": 9.19428,
+            "122": 9.14006,
+            "123": 9.25894,
+            "124": 9.45689,
+            "125": 9.21857,
+            "126": 9.0614,
+            "127": 9.01413,
+            "128": 9.22025,
+            "129": 8.98394,
+            "130": 9.14098,
+            "131": 9.15643,
+            "132": 9.03479,
+            "133": 8.86261,
+            "134": 9.18468,
+            "135": 8.88922,
+            "136": 9.1645,
+            "137": 9.15944,
+            "138": 9.23186,
+            "139": 9.08834,
+            "140": 8.87267,
+            "141": 9.29752,
+            "142": 9.19877,
+            "143": 9.12079,
+            "144": 9.24324,
+            "145": 9.10527,
+            "146": 8.98338,
+            "147": 8.9881,
+            "148": 9.1361,
+            "149": 9.06877,
+            "150": 9.01122,
+            "151": 8.93192,
+            "152": 8.87852,
+            "153": 9.06711,
+            "154": 9.1802,
+            "155": 9.13786,
+            "156": 9.05095,
+            "157": 9.15163,
+            "158": 9.05301,
+            "159": 9.03638,
+            "160": 8.89244,
+            "161": 9.04764,
+            "162": 8.89639,
+            "163": 8.84472,
+            "164": 8.97496,
+            "165": 8.93105,
+            "166": 8.65677,
+            "167": 8.83411,
+            "168": 8.8203,
+            "169": 8.65961,
+            "170": 9.04726,
+            "171": 8.72167,
+            "172": 8.82105,
+            "173": 8.91105,
+            "174": 8.85007,
+            "175": 8.70985,
+            "176": 8.7611,
+            "177": 8.76567,
+            "178": 8.72394,
+            "179": 8.64132,
+            "180": 8.74357,
+            "181": 8.6941,
+            "182": 8.72315,
+            "183": 9.08667,
+            "184": 8.60959,
+            "185": 8.88334,
+            "186": 8.74346,
+            "187": 8.57546,
+            "188": 8.6841,
+            "189": 8.86656,
+            "190": 8.53754,
+            "191": 8.66593,
+            "192": 8.61152,
+            "193": 8.5763,
+            "194": 8.75183,
+            "195": 8.5938,
+            "196": 8.7761,
+            "197": 8.744,
+            "198": 8.63042,
+            "199": 8.77202,
+            "200": 8.73627,
+            "201": 8.67068,
+            "202": 8.55099,
+            "203": 8.54134,
+            "204": 8.71213,
+            "205": 8.22486,
+            "206": 8.85986,
+            "207": 8.67928,
+            "208": 8.70826,
+            "209": 8.75243,
+            "210": 8.58226,
+            "211": 8.84167,
+            "212": 8.4913,
+            "213": 8.57316,
+            "214": 8.51316,
+            "215": 8.56549,
+            "216": 8.50617,
+            "217": 8.53369,
+            "218": 8.53635,
+            "219": 8.64298,
+            "220": 8.54526,
+            "221": 8.39761,
+            "222": 8.50474,
+            "223": 8.44078,
+            "224": 8.52901,
+            "225": 8.5708,
+            "226": 8.44247,
+            "227": 8.67823,
+            "228": 8.3859,
+            "229": 8.4537,
+            "230": 8.4985,
+            "231": 8.50257,
+            "232": 8.49898,
+            "233": 8.49438,
+            "234": 8.64018,
+            "235": 8.5617,
+            "236": 8.39791,
+            "237": 8.49075,
+            "238": 8.30637,
+            "239": 8.56099,
+            "240": 8.67125,
+            "241": 8.447,
+            "242": 8.47179,
+            "243": 8.51685,
+            "244": 8.36975,
+            "245": 8.59641,
+            "246": 8.59557,
+            "247": 8.43962,
+            "248": 8.50986,
+            "249": 8.52277,
+            "250": 8.42301,
+            "251": 8.3783,
+            "252": 8.54698,
+            "253": 8.3164,
+            "254": 8.35246,
+            "255": 8.29609,
+            "256": 8.20858,
+            "257": 8.39462,
+            "258": 8.45148,
+            "259": 8.23213,
+            "260": 8.24039,
+            "261": 8.23733,
+            "262": 8.34866,
+            "263": 8.30632,
+            "264": 8.1907,
+            "265": 8.33202,
+            "266": 8.2336,
+            "267": 7.9013,
+            "268": 8.37861,
+            "269": 8.40384,
+            "270": 8.26475,
+            "271": 8.27885,
+            "272": 8.31844,
+            "273": 8.13253,
+            "274": 8.09818,
+            "275": 8.00901,
+            "276": 7.92522,
+            "277": 8.23699,
+            "278": 8.04701,
+            "279": 7.96356,
+            "280": 7.75515,
+            "281": 8.10016,
+            "282": 8.14722,
+            "283": 8.15666,
+            "284": 8.10022,
+            "285": 8.06894,
+            "286": 7.90037,
+            "287": 7.99127,
+            "288": 8.24359,
+            "289": 8.17176,
+            "290": 8.12684,
+            "291": 8.25357,
+            "292": 8.0756,
+            "293": 8.11914,
+            "294": 7.97501,
+            "295": 7.96533,
+            "296": 8.23576,
+            "297": 7.79081,
+            "298": 8.04236,
+            "299": 7.93831,
+            "300": 7.8498,
+            "301": 8.00964,
+            "302": 7.94515,
+            "303": 7.99053,
+            "304": 7.95899,
+            "305": 7.9946,
+            "306": 7.9738,
+            "307": 7.98707,
+            "308": 7.9953,
+            "309": 8.0059,
+            "310": 7.97168,
+            "311": 7.92562,
+            "312": 7.88182,
+            "313": 7.82955,
+            "314": 7.82035,
+            "315": 7.82475,
+            "316": 7.74495,
+            "317": 7.92567,
+            "318": 7.97631,
+            "319": 7.82443,
+            "320": 7.563,
+            "321": 7.74534,
+            "322": 7.82917,
+            "323": 7.76703,
+            "324": 7.90668,
+            "325": 7.79387,
+            "326": 7.64901,
+            "327": 7.86137,
+            "328": 7.7832,
+            "329": 7.87669,
+            "330": 7.74815,
+            "331": 7.52005,
+            "332": 7.81037,
+            "333": 7.8379,
+            "334": 7.67759,
+            "335": 7.69435,
+            "336": 7.90998,
+            "337": 7.64618,
+            "338": 7.89178,
+            "339": 7.7192,
+            "340": 7.75318,
+            "341": 7.70375,
+            "342": 7.81451,
+            "343": 7.61028,
+            "344": 7.58433,
+            "345": 7.60474,
+            "346": 7.45825,
+            "347": 7.55021,
+            "348": 7.67669,
+            "349": 7.57925,
+            "350": 7.65118,
+            "351": 7.74172,
+            "352": 7.69877,
+            "353": 7.4955,
+            "354": 7.73645,
+            "355": 7.75823,
+            "356": 7.76871,
+            "357": 7.8083,
+            "358": 7.59223,
+            "359": 7.54129,
+            "360": 7.62161,
+            "361": 7.53913,
+            "362": 7.75707,
+            "363": 7.58184,
+            "364": 7.57393,
+            "365": 7.61381,
+            "366": 7.30007,
+            "367": 7.55433,
+            "368": 7.4381,
+            "369": 7.34072,
+            "370": 7.45786,
+            "371": 7.45479,
+            "372": 7.64528,
+            "373": 7.51803,
+            "374": 7.43579,
+            "375": 7.52279,
+            "376": 7.33856,
+            "377": 7.23275,
+            "378": 7.53208,
+            "379": 7.48549,
+            "380": 7.37893,
+            "381": 7.46259,
+            "382": 7.28593,
+            "383": 7.26774,
+            "384": 7.4035,
+            "385": 7.38617,
+            "386": 7.2246,
+            "387": 7.41197,
+            "388": 7.27354,
+            "389": 7.42884,
+            "390": 7.23295,
+            "391": 7.63854,
+            "392": 7.32743,
+            "393": 7.41119,
+            "394": 7.46811,
+            "395": 7.43164,
+            "396": 7.27624,
+            "397": 7.22237,
+            "398": 7.41314,
+            "399": 7.14965,
+            "400": 7.28882,
+            "401": 7.34645,
+            "402": 7.38389,
+            "403": 7.27445,
+            "404": 7.29549,
+            "405": 7.25441,
+            "406": 7.20955,
+            "407": 7.35305,
+            "408": 7.17476,
+            "409": 7.15738,
+            "410": 7.30843,
+            "411": 7.21046,
+            "412": 7.19143,
+            "413": 7.22421,
+            "414": 6.90584,
+            "415": 7.32329,
+            "416": 7.41955,
+            "417": 7.01436,
+            "418": 7.26656,
+            "419": 7.03251,
+            "420": 7.40294,
+            "421": 7.17304,
+            "422": 7.22884,
+            "423": 7.08611,
+            "424": 7.2354,
+            "425": 7.3087,
+            "426": 7.28003,
+            "427": 7.12262,
+            "428": 7.08425,
+            "429": 6.87125,
+            "430": 7.19779,
+            "431": 6.99763,
+            "432": 7.22298,
+            "433": 6.96906,
+            "434": 6.95232,
+            "435": 7.01097,
+            "436": 7.00141,
+            "437": 6.9848,
+            "438": 6.99447,
+            "439": 6.93128,
+            "440": 7.05472,
+            "441": 7.03406,
+            "442": 7.09324,
+            "443": 7.0854,
+            "444": 6.69941,
+            "445": 6.98741,
+            "446": 7.13474,
+            "447": 7.11726,
+            "448": 6.97509,
+            "449": 7.04203,
+            "450": 7.00855,
+            "451": 6.82317,
+            "452": 6.90281,
+            "453": 7.00796,
+            "454": 6.96028,
+            "455": 7.02393,
+            "456": 6.98781,
+            "457": 6.96156,
+            "458": 6.89735,
+            "459": 6.68323,
+            "460": 7.05439,
+            "461": 7.088,
+            "462": 6.86315,
+            "463": 7.04576,
+            "464": 6.64275,
+            "465": 7.02272,
+            "466": 6.99895,
+            "467": 6.99097,
+            "468": 6.94728,
+            "469": 6.82004,
+            "470": 7.0355,
+            "471": 6.87321,
+            "472": 6.95214,
+            "473": 6.81396,
+            "474": 6.96547,
+            "475": 7.1584,
+            "476": 6.75391,
+            "477": 6.88861,
+            "478": 6.89832,
+            "479": 6.69636,
+            "480": 7.01803,
+            "481": 6.98503,
+            "482": 6.72248,
+            "483": 6.77484,
+            "484": 6.74297,
+            "485": 6.92045,
+            "486": 7.05544,
+            "487": 6.62222,
+            "488": 6.87375,
+            "489": 6.76024,
+            "490": 6.81377,
+            "491": 6.69837,
+            "492": 6.68149,
+            "493": 6.75646,
+            "494": 6.66282,
+            "495": 6.62263,
+            "496": 6.57706,
+            "497": 6.8292,
+            "498": 6.63548,
+            "499": 6.84385,
+            "500": 6.64283,
+            "501": 6.71966,
+            "502": 6.82988,
+            "503": 6.69833,
+            "504": 6.60751,
+            "505": 6.6112,
+            "506": 6.73586,
+            "507": 6.85391,
+            "508": 6.84629,
+            "509": 6.6384,
+            "510": 6.81034,
+            "511": 6.72977,
+            "512": 6.72804,
+            "513": 6.64821,
+            "514": 6.70064,
+            "515": 6.43824,
+            "516": 6.73421,
+            "517": 6.69542,
+            "518": 6.52993,
+            "519": 6.62474,
+            "520": 6.84935,
+            "521": 6.65329,
+            "522": 6.6979,
+            "523": 6.73262,
+            "524": 6.72634,
+            "525": 6.6655,
+            "526": 6.40663,
+            "527": 6.79088,
+            "528": 6.65206,
+            "529": 6.62295,
+            "530": 6.61639,
+            "531": 6.63503,
+            "532": 6.62382,
+            "533": 6.75435,
+            "534": 6.60296,
+            "535": 6.74138,
+            "536": 6.61812,
+            "537": 6.63086,
+            "538": 6.52418,
+            "539": 6.54299,
+            "540": 6.57593,
+            "541": 6.44382,
+            "542": 6.66189,
+            "543": 6.67325,
+            "544": 6.66927,
+            "545": 6.80511,
+            "546": 6.6246,
+            "547": 6.40979,
+            "548": 6.71663,
+            "549": 6.68986,
+            "550": 6.51987,
+            "551": 6.74092,
+            "552": 6.63227,
+            "553": 6.47534,
+            "554": 6.62778,
+            "555": 6.45222,
+            "556": 6.60749,
+            "557": 6.62431,
+            "558": 6.37676,
+            "559": 6.36118,
+            "560": 6.5756,
+            "561": 6.72381,
+            "562": 6.62768,
+            "563": 6.73287,
+            "564": 6.34176,
+            "565": 6.50706,
+            "566": 6.6902,
+            "567": 6.55838,
+            "568": 6.50084,
+            "569": 6.44415,
+            "570": 6.35619,
+            "571": 6.62259,
+            "572": 6.30471,
+            "573": 6.5721,
+            "574": 6.46259,
+            "575": 6.63541,
+            "576": 6.50701,
+            "577": 6.51656,
+            "578": 6.47574,
+            "579": 6.45618,
+            "580": 6.5583,
+            "581": 6.59714,
+            "582": 6.46959,
+            "583": 6.50413,
+            "584": 6.51087,
+            "585": 6.41424,
+            "586": 6.40258,
+            "587": 6.4501,
+            "588": 6.55622,
+            "589": 6.61456,
+            "590": 6.27891,
+            "591": 6.66415,
+            "592": 6.2545,
+            "593": 6.46521,
+            "594": 6.37467,
+            "595": 6.34819,
+            "596": 6.25003,
+            "597": 6.18054,
+            "598": 6.44279,
+            "599": 6.38602,
+            "600": 6.44414,
+            "601": 6.25051,
+            "602": 6.51804,
+            "603": 6.50819,
+            "604": 6.37382,
+            "605": 6.48026,
+            "606": 6.3013,
+            "607": 6.51999,
+            "608": 6.66049,
+            "609": 6.16075,
+            "610": 6.55805,
+            "611": 6.38737,
+            "612": 6.56702,
+            "613": 6.41056,
+            "614": 6.18827,
+            "615": 6.38286,
+            "616": 6.34421,
+            "617": 6.36273,
+            "618": 6.43626,
+            "619": 6.12502,
+            "620": 6.3943,
+            "621": 6.44427,
+            "622": 6.38402,
+            "623": 6.56769,
+            "624": 6.34417,
+            "625": 6.26521,
+            "626": 6.28634,
+            "627": 6.4276,
+            "628": 6.24043,
+            "629": 6.57298,
+            "630": 6.3523,
+            "631": 6.33431,
+            "632": 6.29554,
+            "633": 6.24213,
+            "634": 6.29476,
+            "635": 6.53142,
+            "636": 6.23005,
+            "637": 6.62121,
+            "638": 6.00686,
+            "639": 6.26506,
+            "640": 6.2796,
+            "641": 6.19435,
+            "642": 6.27007,
+            "643": 6.44413,
+            "644": 6.2445,
+            "645": 6.23092,
+            "646": 6.38932,
+            "647": 6.3209,
+            "648": 6.34188,
+            "649": 6.33297,
+            "650": 6.47025,
+            "651": 6.31782,
+            "652": 6.23993,
+            "653": 6.36817,
+            "654": 6.43495,
+            "655": 6.5135,
+            "656": 6.31371,
+            "657": 6.4163,
+            "658": 6.22993,
+            "659": 6.1432,
+            "660": 6.3808,
+            "661": 6.15725,
+            "662": 6.2613,
+            "663": 6.36151,
+            "664": 6.32043,
+            "665": 6.39194,
+            "666": 6.15182,
+            "667": 6.18562,
+            "668": 6.22741,
+            "669": 6.20408,
+            "670": 6.23602,
+            "671": 6.22904,
+            "672": 6.47492,
+            "673": 6.32812,
+            "674": 6.28343,
+            "675": 6.37362,
+            "676": 6.38018,
+            "677": 6.29511,
+            "678": 6.26804,
+            "679": 6.22803,
+            "680": 6.28357,
+            "681": 6.19077,
+            "682": 6.07906,
+            "683": 6.26403,
+            "684": 6.31575,
+            "685": 6.2874,
+            "686": 6.14011,
+            "687": 6.27685,
+            "688": 6.19835,
+            "689": 6.61075,
+            "690": 6.16856,
+            "691": 6.17286,
+            "692": 6.2649,
+            "693": 6.13689,
+            "694": 6.22553,
+            "695": 6.31786,
+            "696": 6.1061,
+            "697": 6.14556,
+            "698": 6.21959,
+            "699": 6.45326,
+            "700": 6.03519,
+            "701": 6.05302,
+            "702": 6.23703,
+            "703": 6.17441,
+            "704": 6.20621,
+            "705": 6.11844,
+            "706": 6.06567,
+            "707": 6.24456,
+            "708": 6.30245,
+            "709": 5.99551,
+            "710": 6.15229,
+            "711": 6.2479,
+            "712": 6.17146,
+            "713": 5.88608,
+            "714": 6.09975,
+            "715": 6.10497,
+            "716": 6.40586,
+            "717": 6.18363,
+            "718": 6.23537,
+            "719": 6.26862,
+            "720": 6.25804,
+            "721": 6.25605,
+            "722": 6.22472,
+            "723": 6.07187,
+            "724": 6.22017,
+            "725": 6.0314,
+            "726": 6.29244,
+            "727": 6.00644,
+            "728": 6.03616,
+            "729": 6.0826,
+            "730": 6.17412,
+            "731": 6.09163,
+            "732": 6.07888,
+            "733": 6.11348,
+            "734": 6.37763,
+            "735": 6.26791,
+            "736": 6.17709,
+            "737": 6.36077,
+            "738": 6.13247,
+            "739": 6.14636,
+            "740": 5.87836,
+            "741": 6.00499,
+            "742": 5.98594,
+            "743": 6.17515,
+            "744": 6.02317,
+            "745": 6.14565,
+            "746": 6.03122,
+            "747": 6.09452,
+            "748": 6.22864,
+            "749": 5.93308,
+            "750": 6.16381,
+            "751": 5.95292,
+            "752": 6.01389,
+            "753": 6.02392,
+            "754": 6.28379,
+            "755": 6.12598,
+            "756": 6.2443,
+            "757": 6.01404,
+            "758": 6.19738,
+            "759": 6.22084,
+            "760": 6.02115,
+            "761": 6.1856,
+            "762": 6.21798,
+            "763": 6.02971,
+            "764": 5.95856,
+            "765": 5.92315,
+            "766": 5.96127,
+            "767": 5.81063,
+            "768": 6.18012,
+            "769": 6.27004,
+            "770": 6.28915,
+            "771": 5.78425,
+            "772": 6.0231,
+            "773": 6.17908,
+            "774": 5.87868,
+            "775": 6.02111,
+            "776": 6.12258,
+            "777": 5.875,
+            "778": 6.04901,
+            "779": 5.86583,
+            "780": 6.13275,
+            "781": 5.8451,
+            "782": 6.03644,
+            "783": 5.94982,
+            "784": 5.91239,
+            "785": 6.08718,
+            "786": 6.0949,
+            "787": 5.6498,
+            "788": 5.99117,
+            "789": 6.20208,
+            "790": 6.25533,
+            "791": 5.78584,
+            "792": 5.98398,
+            "793": 6.17232,
+            "794": 6.02303,
+            "795": 5.99758,
+            "796": 6.15575,
+            "797": 6.04799,
+            "798": 6.04773,
+            "799": 6.10394,
+            "800": 6.00523,
+            "801": 6.13976,
+            "802": 5.97143,
+            "803": 6.14303,
+            "804": 5.99897,
+            "805": 5.8162,
+            "806": 6.08016,
+            "807": 6.03933,
+            "808": 5.91779,
+            "809": 5.76774,
+            "810": 6.00748,
+            "811": 5.92407,
+            "812": 5.89853,
+            "813": 5.95603,
+            "814": 6.0199,
+            "815": 5.80113,
+            "816": 6.10732,
+            "817": 5.92704,
+            "818": 6.05349,
+            "819": 5.99954,
+            "820": 5.71925,
+            "821": 5.93871,
+            "822": 6.18742,
+            "823": 5.82051,
+            "824": 5.97479,
+            "825": 6.17898,
+            "826": 6.18992,
+            "827": 6.04811,
+            "828": 6.0618,
+            "829": 5.8808,
+            "830": 5.9338,
+            "831": 5.89066,
+            "832": 5.95946,
+            "833": 6.05775,
+            "834": 5.98694,
+            "835": 5.99225,
+            "836": 5.78808,
+            "837": 6.1001,
+            "838": 5.85774,
+            "839": 5.82603,
+            "840": 6.17451,
+            "841": 5.77389,
+            "842": 5.88244,
+            "843": 5.93827,
+            "844": 6.0037,
+            "845": 6.08214,
+            "846": 5.68388,
+            "847": 5.75348,
+            "848": 5.96075,
+            "849": 6.0909,
+            "850": 5.83839,
+            "851": 6.01221,
+            "852": 5.74277,
+            "853": 5.9819,
+            "854": 6.00994,
+            "855": 5.81104,
+            "856": 5.99027,
+            "857": 5.99462,
+            "858": 6.04349,
+            "859": 5.94378,
+            "860": 6.08776,
+            "861": 6.05806,
+            "862": 5.99259,
+            "863": 5.83184,
+            "864": 5.83727,
+            "865": 5.93014,
+            "866": 5.88373,
+            "867": 5.87071,
+            "868": 6.0603,
+            "869": 6.08011,
+            "870": 5.96321,
+            "871": 6.03762,
+            "872": 5.89053,
+            "873": 5.83933,
+            "874": 6.02181,
+            "875": 5.90658,
+            "876": 5.96303,
+            "877": 5.92074,
+            "878": 6.09702,
+            "879": 5.76213,
+            "880": 6.0073,
+            "881": 5.98795,
+            "882": 5.90217,
+            "883": 5.67039,
+            "884": 5.95748,
+            "885": 5.74054,
+            "886": 5.98445,
+            "887": 5.90648,
+            "888": 5.8314,
+            "889": 6.00733,
+            "890": 6.01123,
+            "891": 5.94286,
+            "892": 5.70277,
+            "893": 6.08459,
+            "894": 5.72165,
+            "895": 5.83588,
+            "896": 5.83978,
+            "897": 5.84943,
+            "898": 5.92347,
+            "899": 5.93201,
+            "900": 5.8958,
+            "901": 5.94689,
+            "902": 5.82987,
+            "903": 6.04738,
+            "904": 5.92586,
+            "905": 5.89894,
+            "906": 5.61575,
+            "907": 5.90522,
+            "908": 5.73333,
+            "909": 5.98526,
+            "910": 5.85686,
+            "911": 5.69844,
+            "912": 5.69856,
+            "913": 5.76407,
+            "914": 5.82436,
+            "915": 5.79681,
+            "916": 5.88608,
+            "917": 5.867,
+            "918": 5.8166,
+            "919": 5.80848,
+            "920": 5.88971,
+            "921": 5.8407,
+            "922": 5.62064,
+            "923": 6.03383,
+            "924": 5.60482,
+            "925": 5.61823,
+            "926": 5.85786,
+            "927": 5.95554,
+            "928": 5.83872,
+            "929": 5.82237,
+            "930": 5.95411,
+            "931": 5.75622,
+            "932": 5.59098,
+            "933": 5.63134,
+            "934": 5.80496,
+            "935": 5.63538,
+            "936": 5.8317,
+            "937": 5.96485,
+            "938": 5.58943,
+            "939": 5.79158,
+            "940": 5.96089,
+            "941": 5.72676,
+            "942": 5.83595,
+            "943": 5.87091,
+            "944": 5.95881,
+            "945": 5.70173,
+            "946": 5.55832,
+            "947": 5.74676,
+            "948": 5.79172,
+            "949": 5.82702,
+            "950": 5.84636,
+            "951": 5.72232,
+            "952": 5.6926,
+            "953": 5.67846,
+            "954": 5.72814,
+            "955": 5.52701,
+            "956": 5.6247,
+            "957": 5.84082,
+            "958": 5.79725,
+            "959": 5.57236,
+            "960": 5.8033,
+            "961": 5.83318,
+            "962": 5.76931,
+            "963": 5.768,
+            "964": 5.70825,
+            "965": 5.63755,
+            "966": 5.60344,
+            "967": 5.72795,
+            "968": 5.74037,
+            "969": 5.82565,
+            "970": 5.64868,
+            "971": 5.70857,
+            "972": 5.85255,
+            "973": 5.67308,
+            "974": 5.7177,
+            "975": 5.86027,
+            "976": 5.71074,
+            "977": 5.77363,
+            "978": 5.68598,
+            "979": 5.5901,
+            "980": 5.76431,
+            "981": 5.89808,
+            "982": 5.47164,
+            "983": 5.61909,
+            "984": 5.54693,
+            "985": 5.58914,
+            "986": 5.6395,
+            "987": 5.57215,
+            "988": 5.71212,
+            "989": 5.69568,
+            "990": 5.62713,
+            "991": 5.85071,
+            "992": 5.77178,
+            "993": 5.87182,
+            "994": 5.69827,
+            "995": 5.7311,
+            "996": 5.73947,
+            "997": 5.81776,
+            "998": 5.83946,
+            "999": 5.83213,
+            "1000": 5.68618,
+            "1001": 5.86902,
+            "1002": 5.75759,
+            "1003": 5.64206,
+            "1004": 5.80056,
+            "1005": 5.53357,
+            "1006": 5.3287,
+            "1007": 5.7697,
+            "1008": 5.79391,
+            "1009": 5.65438,
+            "1010": 5.78459,
+            "1011": 5.89696,
+            "1012": 5.62269,
+            "1013": 5.61367,
+            "1014": 5.67992,
+            "1015": 5.56146,
+            "1016": 5.87263,
+            "1017": 5.83169,
+            "1018": 5.62357,
+            "1019": 5.73336,
+            "1020": 5.61404,
+            "1021": 5.85353,
+            "1022": 5.49696,
+            "1023": 5.65062,
+            "1024": 5.74334,
+            "1025": 5.57222,
+            "1026": 5.40994,
+            "1027": 5.59905,
+            "1028": 5.68935,
+            "1029": 5.68346,
+            "1030": 5.68799,
+            "1031": 5.40526,
+            "1032": 5.78443,
+            "1033": 5.57561,
+            "1034": 5.6274,
+            "1035": 5.71529,
+            "1036": 5.62368,
+            "1037": 5.36621,
+            "1038": 5.66561,
+            "1039": 5.6477,
+            "1040": 5.57324,
+            "1041": 5.59731,
+            "1042": 5.81493,
+            "1043": 5.56271,
+            "1044": 5.46406,
+            "1045": 5.9683,
+            "1046": 5.48617,
+            "1047": 5.39181,
+            "1048": 5.49562,
+            "1049": 5.67791,
+            "1050": 5.69881,
+            "1051": 5.5776,
+            "1052": 5.68149,
+            "1053": 5.63114,
+            "1054": 5.45857,
+            "1055": 5.59887,
+            "1056": 5.67508,
+            "1057": 5.75628,
+            "1058": 5.56524,
+            "1059": 5.74843,
+            "1060": 5.82162,
+            "1061": 5.47233,
+            "1062": 5.65043,
+            "1063": 5.50248,
+            "1064": 5.59125,
+            "1065": 5.55564,
+            "1066": 5.74466,
+            "1067": 5.67043,
+            "1068": 5.44061,
+            "1069": 5.61122,
+            "1070": 5.81207,
+            "1071": 5.51069,
+            "1072": 5.62291,
+            "1073": 5.6192,
+            "1074": 5.52379,
+            "1075": 5.70748,
+            "1076": 5.5951,
+            "1077": 5.70681,
+            "1078": 5.56223,
+            "1079": 5.61677,
+            "1080": 5.64259,
+            "1081": 5.62201,
+            "1082": 5.50149,
+            "1083": 5.64213,
+            "1084": 5.55087,
+            "1085": 5.40393,
+            "1086": 5.62042,
+            "1087": 5.44171,
+            "1088": 5.51111,
+            "1089": 5.76887,
+            "1090": 5.52736,
+            "1091": 5.51307,
+            "1092": 5.40781,
+            "1093": 5.69672,
+            "1094": 5.56925,
+            "1095": 5.5731,
+            "1096": 5.61367,
+            "1097": 5.6454,
+            "1098": 5.65292,
+            "1099": 5.51436,
+            "1100": 5.63973,
+            "1101": 5.67989,
+            "1102": 5.53567,
+            "1103": 5.54943,
+            "1104": 5.53818,
+            "1105": 5.55271,
+            "1106": 5.68243,
+            "1107": 5.68309,
+            "1108": 5.78112,
+            "1109": 5.54014,
+            "1110": 5.6617,
+            "1111": 5.59215,
+            "1112": 5.58702,
+            "1113": 5.62687,
+            "1114": 5.61504,
+            "1115": 5.59863,
+            "1116": 5.66461,
+            "1117": 5.64732,
+            "1118": 5.65418,
+            "1119": 5.70846,
+            "1120": 5.63501,
+            "1121": 5.37809,
+            "1122": 5.23308,
+            "1123": 5.47298,
+            "1124": 5.65454,
+            "1125": 5.68419,
+            "1126": 5.68674,
+            "1127": 5.56954,
+            "1128": 5.62438,
+            "1129": 5.29406,
+            "1130": 5.54548,
+            "1131": 5.6238,
+            "1132": 5.72077,
+            "1133": 5.51615,
+            "1134": 5.55302,
+            "1135": 5.51992,
+            "1136": 5.42021,
+            "1137": 5.46757,
+            "1138": 5.5657,
+            "1139": 5.41524,
+            "1140": 5.26144,
+            "1141": 5.58424,
+            "1142": 5.64054,
+            "1143": 5.385,
+            "1144": 5.3823,
+            "1145": 5.36615,
+            "1146": 5.62886,
+            "1147": 5.49181,
+            "1148": 5.50478,
+            "1149": 5.51839,
+            "1150": 5.39997,
+            "1151": 5.5553,
+            "1152": 5.42174,
+            "1153": 5.4602,
+            "1154": 5.50372,
+            "1155": 5.44072,
+            "1156": 5.34868,
+            "1157": 5.66217,
+            "1158": 5.39889,
+            "1159": 5.33332,
+            "1160": 5.79511,
+            "1161": 5.53597,
+            "1162": 5.45589,
+            "1163": 5.52529,
+            "1164": 5.38319,
+            "1165": 5.52473,
+            "1166": 5.48721,
+            "1167": 5.36058,
+            "1168": 5.49334,
+            "1169": 5.40387,
+            "1170": 5.58667,
+            "1171": 5.48535,
+            "1172": 5.64049,
+            "1173": 5.62012,
+            "1174": 5.51308,
+            "1175": 5.34473,
+            "1176": 5.38256,
+            "1177": 5.55838,
+            "1178": 5.46714,
+            "1179": 5.49373,
+            "1180": 5.46571,
+            "1181": 5.55314,
+            "1182": 5.59825,
+            "1183": 5.76884,
+            "1184": 5.54748,
+            "1185": 5.28691,
+            "1186": 5.60427,
+            "1187": 5.55401,
+            "1188": 5.51546,
+            "1189": 5.38634,
+            "1190": 5.40233,
+            "1191": 5.38976,
+            "1192": 5.49689,
+            "1193": 5.46486,
+            "1194": 5.45443,
+            "1195": 5.32542,
+            "1196": 5.52268,
+            "1197": 5.47666,
+            "1198": 5.52589,
+            "1199": 5.38688,
+            "1200": 5.33164,
+            "1201": 5.49012,
+            "1202": 5.43748,
+            "1203": 5.49375,
+            "1204": 5.40666,
+            "1205": 5.48999,
+            "1206": 5.33478,
+            "1207": 5.58651,
+            "1208": 5.42414,
+            "1209": 5.2931,
+            "1210": 5.49969,
+            "1211": 5.5071,
+            "1212": 5.59732,
+            "1213": 5.41745,
+            "1214": 5.49785,
+            "1215": 5.23706,
+            "1216": 5.41194,
+            "1217": 5.38264,
+            "1218": 5.4506,
+            "1219": 5.48501,
+            "1220": 5.38351,
+            "1221": 5.4519,
+            "1222": 5.31254,
+            "1223": 5.47747,
+            "1224": 5.41418,
+            "1225": 5.42845,
+            "1226": 5.32249,
+            "1227": 5.47547,
+            "1228": 5.73249,
+            "1229": 5.32716,
+            "1230": 5.41211,
+            "1231": 5.07649,
+            "1232": 5.78792,
+            "1233": 5.28531,
+            "1234": 5.24399,
+            "1235": 5.36824,
+            "1236": 5.47881,
+            "1237": 5.20655,
+            "1238": 5.41404,
+            "1239": 5.40719,
+            "1240": 5.46621,
+            "1241": 5.57221,
+            "1242": 5.45465,
+            "1243": 5.43424,
+            "1244": 5.51633,
+            "1245": 5.19115,
+            "1246": 5.71566,
+            "1247": 5.43,
+            "1248": 5.29843,
+            "1249": 5.40246,
+            "1250": 5.34088,
+            "1251": 5.41904,
+            "1252": 5.57108,
+            "1253": 5.489,
+            "1254": 5.31099,
+            "1255": 5.51387,
+            "1256": 5.60708,
+            "1257": 5.42325,
+            "1258": 5.55956,
+            "1259": 5.47585,
+            "1260": 5.50779,
+            "1261": 5.63801,
+            "1262": 5.39496,
+            "1263": 5.32432,
+            "1264": 5.50348,
+            "1265": 5.30656,
+            "1266": 5.23675,
+            "1267": 5.37031,
+            "1268": 5.38615,
+            "1269": 5.14823,
+            "1270": 5.39882,
+            "1271": 5.27753,
+            "1272": 5.52297,
+            "1273": 5.29632,
+            "1274": 5.34638,
+            "1275": 5.37784,
+            "1276": 5.3975,
+            "1277": 5.4606,
+            "1278": 5.35501,
+            "1279": 5.43897,
+            "1280": 5.45708,
+            "1281": 5.4056,
+            "1282": 5.38482,
+            "1283": 5.42347,
+            "1284": 5.34377,
+            "1285": 5.50505,
+            "1286": 5.33544,
+            "1287": 5.58814,
+            "1288": 5.2615,
+            "1289": 5.42995,
+            "1290": 5.49991,
+            "1291": 5.49987,
+            "1292": 5.44631,
+            "1293": 5.4171,
+            "1294": 5.49492,
+            "1295": 5.34499,
+            "1296": 5.18358,
+            "1297": 5.16726,
+            "1298": 5.11761,
+            "1299": 5.30129,
+            "1300": 5.21142,
+            "1301": 5.30283,
+            "1302": 5.27612,
+            "1303": 5.35547,
+            "1304": 5.43158,
+            "1305": 5.36825,
+            "1306": 5.25293,
+            "1307": 5.19217,
+            "1308": 5.27071,
+            "1309": 5.40774,
+            "1310": 5.26053,
+            "1311": 5.37774,
+            "1312": 5.35324,
+            "1313": 5.29428,
+            "1314": 5.29224,
+            "1315": 5.41906,
+            "1316": 5.25856,
+            "1317": 5.27981,
+            "1318": 5.21136,
+            "1319": 5.34401,
+            "1320": 5.4177,
+            "1321": 5.44957,
+            "1322": 5.46219,
+            "1323": 5.37269,
+            "1324": 5.24973,
+            "1325": 5.40538,
+            "1326": 5.53891,
+            "1327": 5.38638,
+            "1328": 5.21164,
+            "1329": 5.41667,
+            "1330": 5.39695,
+            "1331": 5.30979,
+            "1332": 5.3112,
+            "1333": 5.36823,
+            "1334": 5.44451,
+            "1335": 5.36788,
+            "1336": 5.43552,
+            "1337": 5.46933,
+            "1338": 5.30246,
+            "1339": 5.1362,
+            "1340": 5.41205,
+            "1341": 5.34033,
+            "1342": 5.35625,
+            "1343": 5.47387,
+            "1344": 5.37842,
+            "1345": 5.34238,
+            "1346": 5.07927,
+            "1347": 5.38404,
+            "1348": 5.49312,
+            "1349": 5.40746,
+            "1350": 5.02698,
+            "1351": 5.31566,
+            "1352": 5.15947,
+            "1353": 5.3409,
+            "1354": 5.35878,
+            "1355": 5.11364,
+            "1356": 5.25842,
+            "1357": 5.28929,
+            "1358": 5.15831,
+            "1359": 5.10775,
+            "1360": 5.17385,
+            "1361": 5.30604,
+            "1362": 5.06672,
+            "1363": 5.29722,
+            "1364": 5.3953,
+            "1365": 5.01953,
+            "1366": 5.1147,
+            "1367": 5.33054,
+            "1368": 5.18248,
+            "1369": 5.22391,
+            "1370": 5.1961,
+            "1371": 5.27906,
+            "1372": 5.25988,
+            "1373": 5.28404,
+            "1374": 5.2779,
+            "1375": 5.46001,
+            "1376": 5.26713,
+            "1377": 5.26807,
+            "1378": 5.31427,
+            "1379": 5.22765,
+            "1380": 5.25807,
+            "1381": 5.47919,
+            "1382": 5.08739,
+            "1383": 5.37543,
+            "1384": 5.36108,
+            "1385": 5.39028,
+            "1386": 5.16582,
+            "1387": 5.16244,
+            "1388": 5.27616,
+            "1389": 5.30262,
+            "1390": 5.25131,
+            "1391": 5.26406,
+            "1392": 5.36794,
+            "1393": 5.37824,
+            "1394": 5.40104,
+            "1395": 5.32383,
+            "1396": 5.21137,
+            "1397": 5.2828,
+            "1398": 5.36587,
+            "1399": 5.35557,
+            "1400": 5.26522,
+            "1401": 5.35981,
+            "1402": 5.42507,
+            "1403": 5.19768,
+            "1404": 5.27957,
+            "1405": 5.11754,
+            "1406": 4.98933,
+            "1407": 5.39818,
+            "1408": 5.1921,
+            "1409": 5.39429,
+            "1410": 5.37153,
+            "1411": 4.91585,
+            "1412": 5.35244,
+            "1413": 5.41055,
+            "1414": 5.21699,
+            "1415": 5.44044,
+            "1416": 5.32598,
+            "1417": 5.39078,
+            "1418": 5.29894,
+            "1419": 5.31316,
+            "1420": 5.43638,
+            "1421": 5.39683,
+            "1422": 5.41859,
+            "1423": 4.99867,
+            "1424": 5.33177,
+            "1425": 5.58491,
+            "1426": 5.23068,
+            "1427": 5.31742,
+            "1428": 5.33463,
+            "1429": 5.07871,
+            "1430": 5.32748,
+            "1431": 5.32237,
+            "1432": 5.34216,
+            "1433": 5.18496,
+            "1434": 5.16175,
+            "1435": 5.20122,
+            "1436": 5.10715,
+            "1437": 5.22566,
+            "1438": 5.31423,
+            "1439": 5.34769,
+            "1440": 5.34295,
+            "1441": 5.16777,
+            "1442": 5.21935,
+            "1443": 5.20553,
+            "1444": 5.12984,
+            "1445": 5.07414,
+            "1446": 5.26456,
+            "1447": 5.25775,
+            "1448": 5.29302,
+            "1449": 5.24616,
+            "1450": 5.34316,
+            "1451": 5.07004,
+            "1452": 5.26796,
+            "1453": 5.1741,
+            "1454": 5.01458,
+            "1455": 5.12771,
+            "1456": 5.27213,
+            "1457": 5.1882,
+            "1458": 5.00695,
+            "1459": 5.2215,
+            "1460": 5.23955,
+            "1461": 5.08,
+            "1462": 4.97269,
+            "1463": 5.15114,
+            "1464": 5.22113,
+            "1465": 5.27344,
+            "1466": 5.36076,
+            "1467": 5.34631,
+            "1468": 5.2303,
+            "1469": 5.05117,
+            "1470": 5.12322,
+            "1471": 5.25302,
+            "1472": 5.12175,
+            "1473": 5.10167,
+            "1474": 5.21744,
+            "1475": 5.18613,
+            "1476": 5.15517,
+            "1477": 5.26215,
+            "1478": 5.30407,
+            "1479": 5.01063,
+            "1480": 5.182,
+            "1481": 5.25124,
+            "1482": 5.3494,
+            "1483": 5.27058,
+            "1484": 4.92644,
+            "1485": 5.29103,
+            "1486": 5.04435,
+            "1487": 4.88432,
+            "1488": 5.18325,
+            "1489": 5.10139,
+            "1490": 5.04545,
+            "1491": 5.3188,
+            "1492": 5.22283,
+            "1493": 4.94061,
+            "1494": 5.10891,
+            "1495": 5.13402,
+            "1496": 5.05779,
+            "1497": 5.36536,
+            "1498": 5.30609,
+            "1499": 5.143,
+            "1500": 5.09554,
+            "1501": 5.0349,
+            "1502": 5.15423,
+            "1503": 5.43131,
+            "1504": 5.32574,
+            "1505": 5.00836,
+            "1506": 5.14423,
+            "1507": 5.16501,
+            "1508": 5.16864,
+            "1509": 5.3204,
+            "1510": 5.02703,
+            "1511": 5.1198,
+            "1512": 4.98354,
+            "1513": 5.1699,
+            "1514": 5.33407,
+            "1515": 5.36306,
+            "1516": 5.27572,
+            "1517": 5.2256,
+            "1518": 5.02899,
+            "1519": 5.29833,
+            "1520": 5.13757,
+            "1521": 5.15715,
+            "1522": 5.33462,
+            "1523": 5.24144,
+            "1524": 5.06791,
+            "1525": 5.20708,
+            "1526": 5.27861,
+            "1527": 5.25864,
+            "1528": 5.2395,
+            "1529": 5.18253,
+            "1530": 5.23913,
+            "1531": 5.09996,
+            "1532": 5.15679,
+            "1533": 5.05231,
+            "1534": 5.21917,
+            "1535": 5.16769,
+            "1536": 5.102,
+            "1537": 5.0318,
+            "1538": 4.91991,
+            "1539": 5.2394,
+            "1540": 5.11391,
+            "1541": 5.25502,
+            "1542": 5.23775,
+            "1543": 5.05438,
+            "1544": 5.08156,
+            "1545": 5.11794,
+            "1546": 5.32713,
+            "1547": 5.10763,
+            "1548": 5.23418,
+            "1549": 5.23089,
+            "1550": 4.97536,
+            "1551": 5.25942,
+            "1552": 5.0226,
+            "1553": 5.14887,
+            "1554": 5.11051,
+            "1555": 5.11223,
+            "1556": 5.19882,
+            "1557": 5.08844,
+            "1558": 5.22982,
+            "1559": 5.00137,
+            "1560": 5.11269,
+            "1561": 5.14639,
+            "1562": 5.18443,
+            "1563": 5.24639,
+            "1564": 5.26429,
+            "1565": 5.08809,
+            "1566": 5.29393,
+            "1567": 5.04372,
+            "1568": 5.08304,
+            "1569": 5.2002,
+            "1570": 5.17168,
+            "1571": 4.95228,
+            "1572": 5.04524,
+            "1573": 5.02748,
+            "1574": 4.99831,
+            "1575": 5.23124,
+            "1576": 5.20891,
+            "1577": 5.12722,
+            "1578": 5.36355,
+            "1579": 4.94343,
+            "1580": 5.12556,
+            "1581": 5.09739,
+            "1582": 5.28014,
+            "1583": 5.04619,
+            "1584": 5.0566,
+            "1585": 5.11727,
+            "1586": 5.30646,
+            "1587": 5.13281,
+            "1588": 5.22351,
+            "1589": 4.83814,
+            "1590": 5.09825,
+            "1591": 5.18082,
+            "1592": 5.14078,
+            "1593": 5.23646,
+            "1594": 5.11532,
+            "1595": 5.10761,
+            "1596": 5.19194,
+            "1597": 5.11362,
+            "1598": 5.16252,
+            "1599": 5.18865,
+            "1600": 4.86676,
+            "1601": 5.11898,
+            "1602": 5.22827,
+            "1603": 5.19524,
+            "1604": 5.05797,
+            "1605": 5.03277,
+            "1606": 4.98991,
+            "1607": 5.06915,
+            "1608": 4.97927,
+            "1609": 5.07061,
+            "1610": 5.04561,
+            "1611": 4.9918,
+            "1612": 4.75806,
+            "1613": 5.03141,
+            "1614": 4.87811,
+            "1615": 5.07817,
+            "1616": 5.22549,
+            "1617": 5.06182,
+            "1618": 4.98945,
+            "1619": 5.18486,
+            "1620": 5.14429,
+            "1621": 5.31666,
+            "1622": 5.06737,
+            "1623": 5.15063,
+            "1624": 5.1305,
+            "1625": 5.12197,
+            "1626": 5.10206,
+            "1627": 5.1085,
+            "1628": 5.06234,
+            "1629": 4.93316,
+            "1630": 5.06616,
+            "1631": 5.05719,
+            "1632": 5.10145,
+            "1633": 4.97087,
+            "1634": 4.92194,
+            "1635": 5.05013,
+            "1636": 4.9202,
+            "1637": 5.22863,
+            "1638": 5.15783,
+            "1639": 4.9808,
+            "1640": 5.00716,
+            "1641": 5.12367,
+            "1642": 5.0869,
+            "1643": 5.05029,
+            "1644": 5.12283,
+            "1645": 4.96415,
+            "1646": 5.12257,
+            "1647": 5.03267,
+            "1648": 5.1903,
+            "1649": 4.92263,
+            "1650": 5.0596,
+            "1651": 4.93391,
+            "1652": 5.21143,
+            "1653": 5.1587,
+            "1654": 5.13384,
+            "1655": 5.16235,
+            "1656": 5.34793,
+            "1657": 5.21074,
+            "1658": 5.04155,
+            "1659": 4.92889,
+            "1660": 4.8117,
+            "1661": 5.02968,
+            "1662": 5.14515,
+            "1663": 5.15868,
+            "1664": 4.98471,
+            "1665": 5.11027,
+            "1666": 5.10315,
+            "1667": 4.84929,
+            "1668": 5.10956,
+            "1669": 5.07311,
+            "1670": 5.11152,
+            "1671": 5.16545,
+            "1672": 4.77709,
+            "1673": 5.03502,
+            "1674": 4.91572,
+            "1675": 5.04406,
+            "1676": 5.0023,
+            "1677": 4.80013,
+            "1678": 5.02745,
+            "1679": 4.88908,
+            "1680": 5.03791,
+            "1681": 5.06371,
+            "1682": 5.03586,
+            "1683": 4.90255,
+            "1684": 5.06133,
+            "1685": 5.13096,
+            "1686": 5.075,
+            "1687": 4.97679,
+            "1688": 5.17279,
+            "1689": 5.1507,
+            "1690": 4.99681,
+            "1691": 4.99961,
+            "1692": 4.91412,
+            "1693": 5.02305,
+            "1694": 4.94741,
+            "1695": 4.91895,
+            "1696": 5.0846,
+            "1697": 5.05067,
+            "1698": 4.95116,
+            "1699": 5.00638,
+            "1700": 4.94576,
+            "1701": 5.16681,
+            "1702": 5.07316,
+            "1703": 5.16582,
+            "1704": 5.14235,
+            "1705": 4.96408,
+            "1706": 4.98303,
+            "1707": 4.78833,
+            "1708": 5.03283,
+            "1709": 5.2281,
+            "1710": 5.02918,
+            "1711": 5.18873,
+            "1712": 5.19088,
+            "1713": 5.03631,
+            "1714": 5.04689,
+            "1715": 4.91662,
+            "1716": 4.93663,
+            "1717": 4.86445,
+            "1718": 5.02654,
+            "1719": 5.12575,
+            "1720": 5.02353,
+            "1721": 4.9343,
+            "1722": 5.06572,
+            "1723": 4.93302,
+            "1724": 5.03906,
+            "1725": 5.19169,
+            "1726": 5.06497,
+            "1727": 4.91076,
+            "1728": 5.01922,
+            "1729": 5.04885,
+            "1730": 4.91107,
+            "1731": 5.00108,
+            "1732": 4.91468,
+            "1733": 5.12873,
+            "1734": 4.83023,
+            "1735": 5.21293,
+            "1736": 4.91729,
+            "1737": 4.86164,
+            "1738": 4.97933,
+            "1739": 5.16149,
+            "1740": 4.84041,
+            "1741": 4.78298,
+            "1742": 4.91062,
+            "1743": 5.09353,
+            "1744": 4.98531,
+            "1745": 4.82544,
+            "1746": 4.94973,
+            "1747": 4.86843,
+            "1748": 5.06696,
+            "1749": 4.86793,
+            "1750": 5.01333,
+            "1751": 5.12023,
+            "1752": 4.90813,
+            "1753": 5.09204,
+            "1754": 5.05813,
+            "1755": 4.89777,
+            "1756": 5.02216,
+            "1757": 5.14157,
+            "1758": 4.87188,
+            "1759": 4.94434,
+            "1760": 4.83222,
+            "1761": 5.02427,
+            "1762": 4.81507,
+            "1763": 4.77391,
+            "1764": 4.93175,
+            "1765": 5.14727,
+            "1766": 5.33614,
+            "1767": 5.22331,
+            "1768": 4.94712,
+            "1769": 5.0043,
+            "1770": 4.98512,
+            "1771": 4.96473,
+            "1772": 4.98299,
+            "1773": 4.97266,
+            "1774": 4.87138,
+            "1775": 4.9493,
+            "1776": 4.9958,
+            "1777": 4.94665,
+            "1778": 4.99288,
+            "1779": 5.08212,
+            "1780": 4.83608,
+            "1781": 5.05478,
+            "1782": 4.99549,
+            "1783": 5.01236,
+            "1784": 4.93254,
+            "1785": 5.16842,
+            "1786": 4.80892,
+            "1787": 4.9699,
+            "1788": 4.82948,
+            "1789": 4.88554,
+            "1790": 4.80386,
+            "1791": 4.74542,
+            "1792": 4.87988,
+            "1793": 5.11081,
+            "1794": 4.98659,
+            "1795": 4.97147,
+            "1796": 5.00354,
+            "1797": 4.79101,
+            "1798": 4.77029,
+            "1799": 5.01913,
+            "1800": 4.91155,
+            "1801": 5.04891,
+            "1802": 4.82591,
+            "1803": 4.95313,
+            "1804": 4.88492,
+            "1805": 4.90634,
+            "1806": 4.88167,
+            "1807": 4.92894,
+            "1808": 4.92469,
+            "1809": 5.15028,
+            "1810": 5.09708,
+            "1811": 4.96325,
+            "1812": 4.8059,
+            "1813": 5.1023,
+            "1814": 4.7819,
+            "1815": 4.86518,
+            "1816": 5.05104,
+            "1817": 4.79238,
+            "1818": 4.80401,
+            "1819": 5.02672,
+            "1820": 4.68884,
+            "1821": 5.02319,
+            "1822": 4.66224,
+            "1823": 4.86936,
+            "1824": 4.7914,
+            "1825": 5.06607,
+            "1826": 4.81841,
+            "1827": 4.79544,
+            "1828": 4.9506,
+            "1829": 5.10848,
+            "1830": 4.9163,
+            "1831": 4.89965,
+            "1832": 4.83328,
+            "1833": 4.78854,
+            "1834": 4.94794,
+            "1835": 4.96175,
+            "1836": 4.91339,
+            "1837": 4.6762,
+            "1838": 4.80703,
+            "1839": 4.89949,
+            "1840": 4.91213,
+            "1841": 4.84083,
+            "1842": 4.9567,
+            "1843": 4.71182,
+            "1844": 4.6194,
+            "1845": 5.00584,
+            "1846": 4.75435,
+            "1847": 4.86491,
+            "1848": 4.9035,
+            "1849": 4.85124,
+            "1850": 4.87005,
+            "1851": 5.01617,
+            "1852": 4.97859,
+            "1853": 4.82821,
+            "1854": 4.86426,
+            "1855": 4.82455,
+            "1856": 4.75214,
+            "1857": 4.96641,
+            "1858": 4.96711,
+            "1859": 4.7484,
+            "1860": 4.86558,
+            "1861": 5.21257,
+            "1862": 4.61253,
+            "1863": 4.83567,
+            "1864": 4.74748,
+            "1865": 4.86472,
+            "1866": 4.78934,
+            "1867": 5.00307,
+            "1868": 4.72073,
+            "1869": 4.76301,
+            "1870": 4.93972,
+            "1871": 5.00163,
+            "1872": 4.68713,
+            "1873": 4.70038,
+            "1874": 4.85131,
+            "1875": 4.85367,
+            "1876": 4.74378,
+            "1877": 4.80696,
+            "1878": 4.8139,
+            "1879": 4.82462,
+            "1880": 4.89248,
+            "1881": 4.79379,
+            "1882": 4.79882,
+            "1883": 4.78556,
+            "1884": 4.97714,
+            "1885": 4.92363,
+            "1886": 4.82454,
+            "1887": 4.82091,
+            "1888": 4.97246,
+            "1889": 4.96553,
+            "1890": 4.71236,
+            "1891": 4.65764,
+            "1892": 4.85277,
+            "1893": 4.65022,
+            "1894": 4.90165,
+            "1895": 4.79,
+            "1896": 4.66068,
+            "1897": 4.79617,
+            "1898": 4.92161,
+            "1899": 4.77736,
+            "1900": 4.91325,
+            "1901": 4.84998,
+            "1902": 4.787,
+            "1903": 4.76372,
+            "1904": 4.65638,
+            "1905": 4.55077,
+            "1906": 4.81577,
+            "1907": 4.9106,
+            "1908": 5.03029,
+            "1909": 4.89294,
+            "1910": 4.7884,
+            "1911": 4.81269,
+            "1912": 4.653,
+            "1913": 4.95098,
+            "1914": 4.88806,
+            "1915": 4.86687,
+            "1916": 4.9302,
+            "1917": 4.85504,
+            "1918": 4.87427,
+            "1919": 4.99557,
+            "1920": 4.77001,
+            "1921": 4.88729,
+            "1922": 4.8196,
+            "1923": 4.75752,
+            "1924": 4.8297,
+            "1925": 5.05687,
+            "1926": 4.94229,
+            "1927": 4.93308,
+            "1928": 4.92739,
+            "1929": 4.93147,
+            "1930": 4.917,
+            "1931": 4.77692,
+            "1932": 4.86743,
+            "1933": 4.83532,
+            "1934": 4.84373,
+            "1935": 5.11279,
+            "1936": 4.88728,
+            "1937": 4.8824,
+            "1938": 4.80623,
+            "1939": 4.70831,
+            "1940": 4.83067,
+            "1941": 4.74224,
+            "1942": 4.87785,
+            "1943": 4.74082,
+            "1944": 4.7536,
+            "1945": 4.69017,
+            "1946": 4.91953,
+            "1947": 4.87613,
+            "1948": 4.60452,
+            "1949": 4.89888,
+            "1950": 4.79826,
+            "1951": 4.9677,
+            "1952": 4.73855,
+            "1953": 4.79852,
+            "1954": 4.7398,
+            "1955": 4.85209,
+            "1956": 4.88278,
+            "1957": 4.73599,
+            "1958": 4.70215,
+            "1959": 4.76471,
+            "1960": 4.76967,
+            "1961": 4.71471,
+            "1962": 4.83443,
+            "1963": 4.82459,
+            "1964": 4.85019,
+            "1965": 4.87867,
+            "1966": 4.79219,
+            "1967": 4.60013,
+            "1968": 4.83399,
+            "1969": 4.59632,
+            "1970": 4.58346,
+            "1971": 4.90585,
+            "1972": 4.89941,
+            "1973": 4.55559,
+            "1974": 4.8295,
+            "1975": 4.83261,
+            "1976": 4.71818,
+            "1977": 4.58171,
+            "1978": 5.00781,
+            "1979": 4.6663,
+            "1980": 4.74961,
+            "1981": 4.87741,
+            "1982": 4.72647,
+            "1983": 4.89363,
+            "1984": 4.64954,
+            "1985": 4.78941,
+            "1986": 4.70195,
+            "1987": 4.8185,
+            "1988": 4.89272,
+            "1989": 4.63799,
+            "1990": 4.79789,
+            "1991": 4.70399,
+            "1992": 4.80349,
+            "1993": 4.74121,
+            "1994": 4.85611,
+            "1995": 4.5595,
+            "1996": 4.65792,
+            "1997": 4.8133,
+            "1998": 4.68041,
+            "1999": 4.73244,
+            "2000": 4.6301
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 26.0,
+            "2": 32.0,
+            "3": 38.0,
+            "4": 33.0,
+            "5": 32.0,
+            "6": 30.0,
+            "7": 33.0,
+            "8": 34.0,
+            "9": 40.0,
+            "10": 31.0,
+            "11": 26.0,
+            "12": 33.0,
+            "13": 28.0,
+            "14": 29.0,
+            "15": 28.0,
+            "16": 27.0,
+            "17": 32.0,
+            "18": 28.0,
+            "19": 31.0,
+            "20": 39.0,
+            "21": 22.0,
+            "22": 29.0,
+            "23": 39.0,
+            "24": 35.0,
+            "25": 31.0,
+            "26": 40.0,
+            "27": 39.0,
+            "28": 42.0,
+            "29": 53.0,
+            "30": 51.0,
+            "31": 48.0,
+            "32": 51.0,
+            "33": 38.0,
+            "34": 48.0,
+            "35": 47.0,
+            "36": 49.0,
+            "37": 42.0,
+            "38": 43.0,
+            "39": 52.0,
+            "40": 55.0,
+            "41": 39.0,
+            "42": 54.0,
+            "43": 57.0,
+            "44": 53.0,
+            "45": 46.0,
+            "46": 61.0,
+            "47": 52.0,
+            "48": 54.0,
+            "49": 64.0,
+            "50": 64.0,
+            "51": 42.0,
+            "52": 55.0,
+            "53": 48.0,
+            "54": 71.0,
+            "55": 56.0,
+            "56": 74.0,
+            "57": 70.0,
+            "58": 57.0,
+            "59": 53.0,
+            "60": 67.0,
+            "61": 63.0,
+            "62": 59.0,
+            "63": 66.0,
+            "64": 70.0,
+            "65": 59.0,
+            "66": 74.0,
+            "67": 81.0,
+            "68": 74.0,
+            "69": 60.0,
+            "70": 60.0,
+            "71": 66.0,
+            "72": 75.0,
+            "73": 67.0,
+            "74": 63.0,
+            "75": 60.0,
+            "76": 60.0,
+            "77": 78.0,
+            "78": 78.0,
+            "79": 58.0,
+            "80": 63.0,
+            "81": 63.0,
+            "82": 50.0,
+            "83": 63.0,
+            "84": 72.0,
+            "85": 69.0,
+            "86": 80.0,
+            "87": 70.0,
+            "88": 68.0,
+            "89": 69.0,
+            "90": 63.0,
+            "91": 58.0,
+            "92": 87.0,
+            "93": 65.0,
+            "94": 50.0,
+            "95": 67.0,
+            "96": 71.0,
+            "97": 70.0,
+            "98": 81.0,
+            "99": 66.0,
+            "100": 76.0,
+            "101": 67.0,
+            "102": 44.0,
+            "103": 60.0,
+            "104": 68.0,
+            "105": 84.0,
+            "106": 61.0,
+            "107": 76.0,
+            "108": 68.0,
+            "109": 76.0,
+            "110": 74.0,
+            "111": 75.0,
+            "112": 78.0,
+            "113": 58.0,
+            "114": 66.0,
+            "115": 71.0,
+            "116": 63.0,
+            "117": 74.0,
+            "118": 52.0,
+            "119": 74.0,
+            "120": 52.0,
+            "121": 76.0,
+            "122": 66.0,
+            "123": 81.0,
+            "124": 76.0,
+            "125": 87.0,
+            "126": 49.0,
+            "127": 56.0,
+            "128": 78.0,
+            "129": 53.0,
+            "130": 76.0,
+            "131": 86.0,
+            "132": 61.0,
+            "133": 72.0,
+            "134": 62.0,
+            "135": 59.0,
+            "136": 60.0,
+            "137": 57.0,
+            "138": 81.0,
+            "139": 74.0,
+            "140": 59.0,
+            "141": 50.0,
+            "142": 64.0,
+            "143": 54.0,
+            "144": 49.0,
+            "145": 57.0,
+            "146": 51.0,
+            "147": 49.0,
+            "148": 69.0,
+            "149": 49.0,
+            "150": 66.0,
+            "151": 57.0,
+            "152": 51.0,
+            "153": 61.0,
+            "154": 58.0,
+            "155": 68.0,
+            "156": 68.0,
+            "157": 51.0,
+            "158": 68.0,
+            "159": 60.0,
+            "160": 64.0,
+            "161": 66.0,
+            "162": 75.0,
+            "163": 40.0,
+            "164": 84.0,
+            "165": 50.0,
+            "166": 68.0,
+            "167": 54.0,
+            "168": 58.0,
+            "169": 65.0,
+            "170": 71.0,
+            "171": 54.0,
+            "172": 64.0,
+            "173": 81.0,
+            "174": 55.0,
+            "175": 63.0,
+            "176": 69.0,
+            "177": 80.0,
+            "178": 68.0,
+            "179": 69.0,
+            "180": 64.0,
+            "181": 41.0,
+            "182": 63.0,
+            "183": 66.0,
+            "184": 67.0,
+            "185": 77.0,
+            "186": 77.0,
+            "187": 61.0,
+            "188": 62.0,
+            "189": 50.0,
+            "190": 57.0,
+            "191": 60.0,
+            "192": 67.0,
+            "193": 70.0,
+            "194": 72.0,
+            "195": 60.0,
+            "196": 81.0,
+            "197": 56.0,
+            "198": 47.0,
+            "199": 50.0,
+            "200": 86.0,
+            "201": 52.0,
+            "202": 64.0,
+            "203": 58.0,
+            "204": 63.0,
+            "205": 40.0,
+            "206": 72.0,
+            "207": 50.0,
+            "208": 42.0,
+            "209": 69.0,
+            "210": 68.0,
+            "211": 56.0,
+            "212": 64.0,
+            "213": 60.0,
+            "214": 62.0,
+            "215": 66.0,
+            "216": 58.0,
+            "217": 59.0,
+            "218": 70.0,
+            "219": 80.0,
+            "220": 81.0,
+            "221": 51.0,
+            "222": 57.0,
+            "223": 67.0,
+            "224": 53.0,
+            "225": 61.0,
+            "226": 68.0,
+            "227": 76.0,
+            "228": 59.0,
+            "229": 44.0,
+            "230": 50.0,
+            "231": 58.0,
+            "232": 65.0,
+            "233": 90.0,
+            "234": 60.0,
+            "235": 98.0,
+            "236": 49.0,
+            "237": 92.0,
+            "238": 71.0,
+            "239": 68.0,
+            "240": 79.0,
+            "241": 67.0,
+            "242": 75.0,
+            "243": 66.0,
+            "244": 59.0,
+            "245": 81.0,
+            "246": 80.0,
+            "247": 88.0,
+            "248": 81.0,
+            "249": 79.0,
+            "250": 80.0,
+            "251": 74.0,
+            "252": 72.0,
+            "253": 57.0,
+            "254": 67.0,
+            "255": 79.0,
+            "256": 86.0,
+            "257": 66.0,
+            "258": 94.0,
+            "259": 69.0,
+            "260": 70.0,
+            "261": 64.0,
+            "262": 77.0,
+            "263": 74.0,
+            "264": 70.0,
+            "265": 68.0,
+            "266": 67.0,
+            "267": 66.0,
+            "268": 59.0,
+            "269": 73.0,
+            "270": 85.0,
+            "271": 67.0,
+            "272": 81.0,
+            "273": 71.0,
+            "274": 69.0,
+            "275": 72.0,
+            "276": 72.0,
+            "277": 82.0,
+            "278": 61.0,
+            "279": 94.0,
+            "280": 56.0,
+            "281": 55.0,
+            "282": 73.0,
+            "283": 90.0,
+            "284": 85.0,
+            "285": 49.0,
+            "286": 50.0,
+            "287": 90.0,
+            "288": 71.0,
+            "289": 85.0,
+            "290": 75.0,
+            "291": 88.0,
+            "292": 88.0,
+            "293": 91.0,
+            "294": 84.0,
+            "295": 85.0,
+            "296": 102.0,
+            "297": 70.0,
+            "298": 65.0,
+            "299": 80.0,
+            "300": 80.0,
+            "301": 91.0,
+            "302": 94.0,
+            "303": 71.0,
+            "304": 74.0,
+            "305": 59.0,
+            "306": 72.0,
+            "307": 73.0,
+            "308": 91.0,
+            "309": 88.0,
+            "310": 82.0,
+            "311": 84.0,
+            "312": 73.0,
+            "313": 97.0,
+            "314": 74.0,
+            "315": 69.0,
+            "316": 96.0,
+            "317": 61.0,
+            "318": 99.0,
+            "319": 67.0,
+            "320": 77.0,
+            "321": 86.0,
+            "322": 70.0,
+            "323": 86.0,
+            "324": 96.0,
+            "325": 74.0,
+            "326": 97.0,
+            "327": 73.0,
+            "328": 99.0,
+            "329": 93.0,
+            "330": 96.0,
+            "331": 81.0,
+            "332": 79.0,
+            "333": 97.0,
+            "334": 81.0,
+            "335": 84.0,
+            "336": 81.0,
+            "337": 99.0,
+            "338": 89.0,
+            "339": 93.0,
+            "340": 101.0,
+            "341": 93.0,
+            "342": 57.0,
+            "343": 81.0,
+            "344": 105.0,
+            "345": 88.0,
+            "346": 85.0,
+            "347": 91.0,
+            "348": 82.0,
+            "349": 78.0,
+            "350": 101.0,
+            "351": 105.0,
+            "352": 76.0,
+            "353": 112.0,
+            "354": 72.0,
+            "355": 79.0,
+            "356": 104.0,
+            "357": 86.0,
+            "358": 77.0,
+            "359": 99.0,
+            "360": 102.0,
+            "361": 64.0,
+            "362": 123.0,
+            "363": 96.0,
+            "364": 95.0,
+            "365": 85.0,
+            "366": 82.0,
+            "367": 84.0,
+            "368": 83.0,
+            "369": 77.0,
+            "370": 118.0,
+            "371": 76.0,
+            "372": 77.0,
+            "373": 96.0,
+            "374": 68.0,
+            "375": 92.0,
+            "376": 84.0,
+            "377": 98.0,
+            "378": 99.0,
+            "379": 108.0,
+            "380": 96.0,
+            "381": 92.0,
+            "382": 75.0,
+            "383": 89.0,
+            "384": 100.0,
+            "385": 73.0,
+            "386": 85.0,
+            "387": 73.0,
+            "388": 93.0,
+            "389": 88.0,
+            "390": 90.0,
+            "391": 115.0,
+            "392": 88.0,
+            "393": 99.0,
+            "394": 104.0,
+            "395": 125.0,
+            "396": 80.0,
+            "397": 78.0,
+            "398": 67.0,
+            "399": 104.0,
+            "400": 96.0,
+            "401": 105.0,
+            "402": 88.0,
+            "403": 97.0,
+            "404": 101.0,
+            "405": 85.0,
+            "406": 114.0,
+            "407": 76.0,
+            "408": 98.0,
+            "409": 84.0,
+            "410": 102.0,
+            "411": 81.0,
+            "412": 56.0,
+            "413": 68.0,
+            "414": 90.0,
+            "415": 95.0,
+            "416": 93.0,
+            "417": 90.0,
+            "418": 60.0,
+            "419": 86.0,
+            "420": 76.0,
+            "421": 110.0,
+            "422": 89.0,
+            "423": 78.0,
+            "424": 82.0,
+            "425": 94.0,
+            "426": 80.0,
+            "427": 96.0,
+            "428": 86.0,
+            "429": 92.0,
+            "430": 84.0,
+            "431": 87.0,
+            "432": 80.0,
+            "433": 81.0,
+            "434": 93.0,
+            "435": 83.0,
+            "436": 82.0,
+            "437": 91.0,
+            "438": 62.0,
+            "439": 72.0,
+            "440": 79.0,
+            "441": 87.0,
+            "442": 106.0,
+            "443": 106.0,
+            "444": 58.0,
+            "445": 93.0,
+            "446": 89.0,
+            "447": 97.0,
+            "448": 79.0,
+            "449": 90.0,
+            "450": 83.0,
+            "451": 63.0,
+            "452": 70.0,
+            "453": 63.0,
+            "454": 80.0,
+            "455": 114.0,
+            "456": 98.0,
+            "457": 101.0,
+            "458": 70.0,
+            "459": 69.0,
+            "460": 65.0,
+            "461": 115.0,
+            "462": 63.0,
+            "463": 73.0,
+            "464": 69.0,
+            "465": 95.0,
+            "466": 76.0,
+            "467": 77.0,
+            "468": 90.0,
+            "469": 65.0,
+            "470": 91.0,
+            "471": 76.0,
+            "472": 60.0,
+            "473": 94.0,
+            "474": 69.0,
+            "475": 90.0,
+            "476": 66.0,
+            "477": 75.0,
+            "478": 78.0,
+            "479": 63.0,
+            "480": 73.0,
+            "481": 80.0,
+            "482": 77.0,
+            "483": 78.0,
+            "484": 84.0,
+            "485": 70.0,
+            "486": 84.0,
+            "487": 69.0,
+            "488": 88.0,
+            "489": 77.0,
+            "490": 59.0,
+            "491": 83.0,
+            "492": 57.0,
+            "493": 83.0,
+            "494": 69.0,
+            "495": 50.0,
+            "496": 56.0,
+            "497": 97.0,
+            "498": 77.0,
+            "499": 75.0,
+            "500": 60.0,
+            "501": 64.0,
+            "502": 64.0,
+            "503": 71.0,
+            "504": 77.0,
+            "505": 68.0,
+            "506": 65.0,
+            "507": 80.0,
+            "508": 42.0,
+            "509": 63.0,
+            "510": 77.0,
+            "511": 81.0,
+            "512": 57.0,
+            "513": 61.0,
+            "514": 60.0,
+            "515": 71.0,
+            "516": 61.0,
+            "517": 85.0,
+            "518": 43.0,
+            "519": 72.0,
+            "520": 82.0,
+            "521": 50.0,
+            "522": 58.0,
+            "523": 74.0,
+            "524": 70.0,
+            "525": 82.0,
+            "526": 60.0,
+            "527": 71.0,
+            "528": 63.0,
+            "529": 66.0,
+            "530": 67.0,
+            "531": 69.0,
+            "532": 72.0,
+            "533": 81.0,
+            "534": 62.0,
+            "535": 66.0,
+            "536": 61.0,
+            "537": 60.0,
+            "538": 55.0,
+            "539": 62.0,
+            "540": 63.0,
+            "541": 61.0,
+            "542": 61.0,
+            "543": 55.0,
+            "544": 64.0,
+            "545": 73.0,
+            "546": 77.0,
+            "547": 69.0,
+            "548": 75.0,
+            "549": 61.0,
+            "550": 61.0,
+            "551": 63.0,
+            "552": 71.0,
+            "553": 78.0,
+            "554": 67.0,
+            "555": 65.0,
+            "556": 74.0,
+            "557": 61.0,
+            "558": 62.0,
+            "559": 62.0,
+            "560": 71.0,
+            "561": 56.0,
+            "562": 65.0,
+            "563": 77.0,
+            "564": 67.0,
+            "565": 55.0,
+            "566": 58.0,
+            "567": 42.0,
+            "568": 70.0,
+            "569": 56.0,
+            "570": 60.0,
+            "571": 58.0,
+            "572": 41.0,
+            "573": 71.0,
+            "574": 69.0,
+            "575": 85.0,
+            "576": 44.0,
+            "577": 50.0,
+            "578": 69.0,
+            "579": 62.0,
+            "580": 67.0,
+            "581": 59.0,
+            "582": 58.0,
+            "583": 55.0,
+            "584": 47.0,
+            "585": 60.0,
+            "586": 41.0,
+            "587": 47.0,
+            "588": 53.0,
+            "589": 55.0,
+            "590": 46.0,
+            "591": 69.0,
+            "592": 50.0,
+            "593": 52.0,
+            "594": 56.0,
+            "595": 47.0,
+            "596": 44.0,
+            "597": 33.0,
+            "598": 61.0,
+            "599": 50.0,
+            "600": 88.0,
+            "601": 55.0,
+            "602": 64.0,
+            "603": 60.0,
+            "604": 57.0,
+            "605": 57.0,
+            "606": 45.0,
+            "607": 54.0,
+            "608": 45.0,
+            "609": 40.0,
+            "610": 45.0,
+            "611": 53.0,
+            "612": 52.0,
+            "613": 73.0,
+            "614": 53.0,
+            "615": 52.0,
+            "616": 64.0,
+            "617": 44.0,
+            "618": 59.0,
+            "619": 50.0,
+            "620": 72.0,
+            "621": 50.0,
+            "622": 58.0,
+            "623": 57.0,
+            "624": 56.0,
+            "625": 56.0,
+            "626": 71.0,
+            "627": 50.0,
+            "628": 49.0,
+            "629": 50.0,
+            "630": 50.0,
+            "631": 40.0,
+            "632": 45.0,
+            "633": 42.0,
+            "634": 38.0,
+            "635": 51.0,
+            "636": 36.0,
+            "637": 55.0,
+            "638": 45.0,
+            "639": 63.0,
+            "640": 52.0,
+            "641": 51.0,
+            "642": 52.0,
+            "643": 49.0,
+            "644": 51.0,
+            "645": 57.0,
+            "646": 57.0,
+            "647": 69.0,
+            "648": 60.0,
+            "649": 49.0,
+            "650": 49.0,
+            "651": 66.0,
+            "652": 49.0,
+            "653": 59.0,
+            "654": 42.0,
+            "655": 42.0,
+            "656": 46.0,
+            "657": 49.0,
+            "658": 50.0,
+            "659": 44.0,
+            "660": 53.0,
+            "661": 46.0,
+            "662": 60.0,
+            "663": 43.0,
+            "664": 61.0,
+            "665": 37.0,
+            "666": 30.0,
+            "667": 42.0,
+            "668": 41.0,
+            "669": 44.0,
+            "670": 44.0,
+            "671": 59.0,
+            "672": 53.0,
+            "673": 47.0,
+            "674": 42.0,
+            "675": 54.0,
+            "676": 43.0,
+            "677": 68.0,
+            "678": 41.0,
+            "679": 38.0,
+            "680": 46.0,
+            "681": 50.0,
+            "682": 33.0,
+            "683": 38.0,
+            "684": 52.0,
+            "685": 40.0,
+            "686": 43.0,
+            "687": 61.0,
+            "688": 57.0,
+            "689": 51.0,
+            "690": 35.0,
+            "691": 45.0,
+            "692": 55.0,
+            "693": 36.0,
+            "694": 50.0,
+            "695": 50.0,
+            "696": 51.0,
+            "697": 41.0,
+            "698": 37.0,
+            "699": 47.0,
+            "700": 42.0,
+            "701": 37.0,
+            "702": 33.0,
+            "703": 39.0,
+            "704": 43.0,
+            "705": 45.0,
+            "706": 32.0,
+            "707": 38.0,
+            "708": 38.0,
+            "709": 46.0,
+            "710": 35.0,
+            "711": 48.0,
+            "712": 35.0,
+            "713": 48.0,
+            "714": 37.0,
+            "715": 48.0,
+            "716": 36.0,
+            "717": 34.0,
+            "718": 26.0,
+            "719": 36.0,
+            "720": 34.0,
+            "721": 36.0,
+            "722": 35.0,
+            "723": 29.0,
+            "724": 47.0,
+            "725": 32.0,
+            "726": 39.0,
+            "727": 40.0,
+            "728": 39.0,
+            "729": 47.0,
+            "730": 36.0,
+            "731": 48.0,
+            "732": 43.0,
+            "733": 39.0,
+            "734": 51.0,
+            "735": 40.0,
+            "736": 49.0,
+            "737": 44.0,
+            "738": 27.0,
+            "739": 46.0,
+            "740": 38.0,
+            "741": 38.0,
+            "742": 45.0,
+            "743": 44.0,
+            "744": 52.0,
+            "745": 48.0,
+            "746": 50.0,
+            "747": 53.0,
+            "748": 52.0,
+            "749": 48.0,
+            "750": 46.0,
+            "751": 40.0,
+            "752": 50.0,
+            "753": 44.0,
+            "754": 43.0,
+            "755": 48.0,
+            "756": 38.0,
+            "757": 45.0,
+            "758": 40.0,
+            "759": 56.0,
+            "760": 46.0,
+            "761": 44.0,
+            "762": 48.0,
+            "763": 54.0,
+            "764": 49.0,
+            "765": 42.0,
+            "766": 57.0,
+            "767": 45.0,
+            "768": 51.0,
+            "769": 60.0,
+            "770": 51.0,
+            "771": 31.0,
+            "772": 41.0,
+            "773": 60.0,
+            "774": 37.0,
+            "775": 43.0,
+            "776": 37.0,
+            "777": 34.0,
+            "778": 42.0,
+            "779": 37.0,
+            "780": 34.0,
+            "781": 41.0,
+            "782": 25.0,
+            "783": 30.0,
+            "784": 39.0,
+            "785": 34.0,
+            "786": 38.0,
+            "787": 47.0,
+            "788": 41.0,
+            "789": 50.0,
+            "790": 44.0,
+            "791": 34.0,
+            "792": 38.0,
+            "793": 53.0,
+            "794": 45.0,
+            "795": 52.0,
+            "796": 39.0,
+            "797": 41.0,
+            "798": 39.0,
+            "799": 44.0,
+            "800": 46.0,
+            "801": 44.0,
+            "802": 40.0,
+            "803": 47.0,
+            "804": 34.0,
+            "805": 45.0,
+            "806": 43.0,
+            "807": 46.0,
+            "808": 36.0,
+            "809": 35.0,
+            "810": 35.0,
+            "811": 44.0,
+            "812": 47.0,
+            "813": 41.0,
+            "814": 36.0,
+            "815": 41.0,
+            "816": 52.0,
+            "817": 43.0,
+            "818": 35.0,
+            "819": 52.0,
+            "820": 40.0,
+            "821": 29.0,
+            "822": 34.0,
+            "823": 44.0,
+            "824": 47.0,
+            "825": 36.0,
+            "826": 40.0,
+            "827": 29.0,
+            "828": 35.0,
+            "829": 32.0,
+            "830": 30.0,
+            "831": 36.0,
+            "832": 34.0,
+            "833": 39.0,
+            "834": 50.0,
+            "835": 38.0,
+            "836": 37.0,
+            "837": 50.0,
+            "838": 45.0,
+            "839": 52.0,
+            "840": 37.0,
+            "841": 35.0,
+            "842": 30.0,
+            "843": 50.0,
+            "844": 23.0,
+            "845": 45.0,
+            "846": 25.0,
+            "847": 32.0,
+            "848": 25.0,
+            "849": 34.0,
+            "850": 39.0,
+            "851": 46.0,
+            "852": 41.0,
+            "853": 43.0,
+            "854": 45.0,
+            "855": 27.0,
+            "856": 47.0,
+            "857": 47.0,
+            "858": 46.0,
+            "859": 35.0,
+            "860": 45.0,
+            "861": 30.0,
+            "862": 39.0,
+            "863": 21.0,
+            "864": 26.0,
+            "865": 46.0,
+            "866": 44.0,
+            "867": 48.0,
+            "868": 27.0,
+            "869": 42.0,
+            "870": 45.0,
+            "871": 33.0,
+            "872": 49.0,
+            "873": 32.0,
+            "874": 56.0,
+            "875": 38.0,
+            "876": 41.0,
+            "877": 40.0,
+            "878": 37.0,
+            "879": 22.0,
+            "880": 39.0,
+            "881": 40.0,
+            "882": 49.0,
+            "883": 39.0,
+            "884": 35.0,
+            "885": 32.0,
+            "886": 45.0,
+            "887": 41.0,
+            "888": 34.0,
+            "889": 35.0,
+            "890": 37.0,
+            "891": 41.0,
+            "892": 42.0,
+            "893": 42.0,
+            "894": 34.0,
+            "895": 38.0,
+            "896": 37.0,
+            "897": 41.0,
+            "898": 33.0,
+            "899": 35.0,
+            "900": 39.0,
+            "901": 37.0,
+            "902": 39.0,
+            "903": 42.0,
+            "904": 38.0,
+            "905": 32.0,
+            "906": 34.0,
+            "907": 38.0,
+            "908": 39.0,
+            "909": 52.0,
+            "910": 34.0,
+            "911": 26.0,
+            "912": 46.0,
+            "913": 40.0,
+            "914": 48.0,
+            "915": 25.0,
+            "916": 49.0,
+            "917": 36.0,
+            "918": 31.0,
+            "919": 26.0,
+            "920": 40.0,
+            "921": 34.0,
+            "922": 38.0,
+            "923": 41.0,
+            "924": 24.0,
+            "925": 27.0,
+            "926": 43.0,
+            "927": 31.0,
+            "928": 40.0,
+            "929": 32.0,
+            "930": 42.0,
+            "931": 33.0,
+            "932": 34.0,
+            "933": 38.0,
+            "934": 41.0,
+            "935": 26.0,
+            "936": 44.0,
+            "937": 36.0,
+            "938": 37.0,
+            "939": 28.0,
+            "940": 33.0,
+            "941": 34.0,
+            "942": 31.0,
+            "943": 26.0,
+            "944": 37.0,
+            "945": 29.0,
+            "946": 31.0,
+            "947": 34.0,
+            "948": 41.0,
+            "949": 31.0,
+            "950": 35.0,
+            "951": 31.0,
+            "952": 38.0,
+            "953": 47.0,
+            "954": 43.0,
+            "955": 46.0,
+            "956": 35.0,
+            "957": 40.0,
+            "958": 37.0,
+            "959": 52.0,
+            "960": 35.0,
+            "961": 38.0,
+            "962": 41.0,
+            "963": 45.0,
+            "964": 43.0,
+            "965": 51.0,
+            "966": 38.0,
+            "967": 31.0,
+            "968": 32.0,
+            "969": 35.0,
+            "970": 48.0,
+            "971": 38.0,
+            "972": 43.0,
+            "973": 38.0,
+            "974": 40.0,
+            "975": 43.0,
+            "976": 29.0,
+            "977": 44.0,
+            "978": 31.0,
+            "979": 43.0,
+            "980": 39.0,
+            "981": 33.0,
+            "982": 30.0,
+            "983": 54.0,
+            "984": 43.0,
+            "985": 48.0,
+            "986": 40.0,
+            "987": 30.0,
+            "988": 38.0,
+            "989": 38.0,
+            "990": 42.0,
+            "991": 36.0,
+            "992": 48.0,
+            "993": 47.0,
+            "994": 50.0,
+            "995": 35.0,
+            "996": 29.0,
+            "997": 51.0,
+            "998": 42.0,
+            "999": 35.0,
+            "1000": 28.0,
+            "1001": 23.0,
+            "1002": 35.0,
+            "1003": 39.0,
+            "1004": 46.0,
+            "1005": 42.0,
+            "1006": 27.0,
+            "1007": 44.0,
+            "1008": 32.0,
+            "1009": 34.0,
+            "1010": 29.0,
+            "1011": 31.0,
+            "1012": 28.0,
+            "1013": 37.0,
+            "1014": 29.0,
+            "1015": 39.0,
+            "1016": 31.0,
+            "1017": 37.0,
+            "1018": 46.0,
+            "1019": 26.0,
+            "1020": 34.0,
+            "1021": 30.0,
+            "1022": 46.0,
+            "1023": 38.0,
+            "1024": 49.0,
+            "1025": 41.0,
+            "1026": 55.0,
+            "1027": 37.0,
+            "1028": 29.0,
+            "1029": 38.0,
+            "1030": 35.0,
+            "1031": 41.0,
+            "1032": 42.0,
+            "1033": 27.0,
+            "1034": 29.0,
+            "1035": 32.0,
+            "1036": 25.0,
+            "1037": 34.0,
+            "1038": 32.0,
+            "1039": 31.0,
+            "1040": 30.0,
+            "1041": 24.0,
+            "1042": 20.0,
+            "1043": 26.0,
+            "1044": 44.0,
+            "1045": 37.0,
+            "1046": 34.0,
+            "1047": 27.0,
+            "1048": 36.0,
+            "1049": 42.0,
+            "1050": 37.0,
+            "1051": 40.0,
+            "1052": 40.0,
+            "1053": 32.0,
+            "1054": 37.0,
+            "1055": 31.0,
+            "1056": 36.0,
+            "1057": 37.0,
+            "1058": 37.0,
+            "1059": 35.0,
+            "1060": 32.0,
+            "1061": 37.0,
+            "1062": 45.0,
+            "1063": 38.0,
+            "1064": 42.0,
+            "1065": 35.0,
+            "1066": 36.0,
+            "1067": 29.0,
+            "1068": 30.0,
+            "1069": 30.0,
+            "1070": 39.0,
+            "1071": 33.0,
+            "1072": 36.0,
+            "1073": 41.0,
+            "1074": 47.0,
+            "1075": 36.0,
+            "1076": 39.0,
+            "1077": 45.0,
+            "1078": 32.0,
+            "1079": 46.0,
+            "1080": 43.0,
+            "1081": 40.0,
+            "1082": 42.0,
+            "1083": 42.0,
+            "1084": 42.0,
+            "1085": 38.0,
+            "1086": 42.0,
+            "1087": 36.0,
+            "1088": 31.0,
+            "1089": 42.0,
+            "1090": 28.0,
+            "1091": 36.0,
+            "1092": 35.0,
+            "1093": 36.0,
+            "1094": 41.0,
+            "1095": 37.0,
+            "1096": 48.0,
+            "1097": 33.0,
+            "1098": 24.0,
+            "1099": 43.0,
+            "1100": 41.0,
+            "1101": 38.0,
+            "1102": 39.0,
+            "1103": 29.0,
+            "1104": 33.0,
+            "1105": 38.0,
+            "1106": 37.0,
+            "1107": 30.0,
+            "1108": 41.0,
+            "1109": 41.0,
+            "1110": 42.0,
+            "1111": 43.0,
+            "1112": 25.0,
+            "1113": 40.0,
+            "1114": 32.0,
+            "1115": 34.0,
+            "1116": 45.0,
+            "1117": 40.0,
+            "1118": 39.0,
+            "1119": 31.0,
+            "1120": 28.0,
+            "1121": 28.0,
+            "1122": 28.0,
+            "1123": 43.0,
+            "1124": 34.0,
+            "1125": 26.0,
+            "1126": 33.0,
+            "1127": 31.0,
+            "1128": 33.0,
+            "1129": 43.0,
+            "1130": 43.0,
+            "1131": 40.0,
+            "1132": 42.0,
+            "1133": 34.0,
+            "1134": 32.0,
+            "1135": 29.0,
+            "1136": 36.0,
+            "1137": 42.0,
+            "1138": 34.0,
+            "1139": 31.0,
+            "1140": 38.0,
+            "1141": 37.0,
+            "1142": 38.0,
+            "1143": 44.0,
+            "1144": 40.0,
+            "1145": 39.0,
+            "1146": 42.0,
+            "1147": 35.0,
+            "1148": 29.0,
+            "1149": 40.0,
+            "1150": 34.0,
+            "1151": 27.0,
+            "1152": 22.0,
+            "1153": 36.0,
+            "1154": 31.0,
+            "1155": 41.0,
+            "1156": 26.0,
+            "1157": 33.0,
+            "1158": 35.0,
+            "1159": 36.0,
+            "1160": 41.0,
+            "1161": 40.0,
+            "1162": 48.0,
+            "1163": 37.0,
+            "1164": 43.0,
+            "1165": 34.0,
+            "1166": 30.0,
+            "1167": 34.0,
+            "1168": 31.0,
+            "1169": 41.0,
+            "1170": 27.0,
+            "1171": 40.0,
+            "1172": 34.0,
+            "1173": 23.0,
+            "1174": 40.0,
+            "1175": 30.0,
+            "1176": 50.0,
+            "1177": 39.0,
+            "1178": 33.0,
+            "1179": 42.0,
+            "1180": 31.0,
+            "1181": 30.0,
+            "1182": 38.0,
+            "1183": 37.0,
+            "1184": 35.0,
+            "1185": 31.0,
+            "1186": 29.0,
+            "1187": 39.0,
+            "1188": 34.0,
+            "1189": 48.0,
+            "1190": 32.0,
+            "1191": 41.0,
+            "1192": 45.0,
+            "1193": 28.0,
+            "1194": 46.0,
+            "1195": 34.0,
+            "1196": 38.0,
+            "1197": 51.0,
+            "1198": 36.0,
+            "1199": 40.0,
+            "1200": 29.0,
+            "1201": 37.0,
+            "1202": 32.0,
+            "1203": 35.0,
+            "1204": 37.0,
+            "1205": 56.0,
+            "1206": 40.0,
+            "1207": 36.0,
+            "1208": 41.0,
+            "1209": 31.0,
+            "1210": 39.0,
+            "1211": 46.0,
+            "1212": 45.0,
+            "1213": 57.0,
+            "1214": 31.0,
+            "1215": 33.0,
+            "1216": 31.0,
+            "1217": 34.0,
+            "1218": 42.0,
+            "1219": 45.0,
+            "1220": 37.0,
+            "1221": 44.0,
+            "1222": 32.0,
+            "1223": 35.0,
+            "1224": 34.0,
+            "1225": 45.0,
+            "1226": 28.0,
+            "1227": 34.0,
+            "1228": 27.0,
+            "1229": 23.0,
+            "1230": 25.0,
+            "1231": 14.0,
+            "1232": 36.0,
+            "1233": 39.0,
+            "1234": 37.0,
+            "1235": 32.0,
+            "1236": 41.0,
+            "1237": 30.0,
+            "1238": 36.0,
+            "1239": 37.0,
+            "1240": 48.0,
+            "1241": 31.0,
+            "1242": 34.0,
+            "1243": 35.0,
+            "1244": 29.0,
+            "1245": 28.0,
+            "1246": 36.0,
+            "1247": 31.0,
+            "1248": 38.0,
+            "1249": 27.0,
+            "1250": 40.0,
+            "1251": 26.0,
+            "1252": 42.0,
+            "1253": 32.0,
+            "1254": 39.0,
+            "1255": 46.0,
+            "1256": 41.0,
+            "1257": 30.0,
+            "1258": 44.0,
+            "1259": 32.0,
+            "1260": 25.0,
+            "1261": 42.0,
+            "1262": 36.0,
+            "1263": 34.0,
+            "1264": 32.0,
+            "1265": 35.0,
+            "1266": 34.0,
+            "1267": 38.0,
+            "1268": 43.0,
+            "1269": 30.0,
+            "1270": 28.0,
+            "1271": 42.0,
+            "1272": 32.0,
+            "1273": 40.0,
+            "1274": 44.0,
+            "1275": 38.0,
+            "1276": 31.0,
+            "1277": 54.0,
+            "1278": 46.0,
+            "1279": 44.0,
+            "1280": 34.0,
+            "1281": 26.0,
+            "1282": 37.0,
+            "1283": 32.0,
+            "1284": 43.0,
+            "1285": 43.0,
+            "1286": 36.0,
+            "1287": 46.0,
+            "1288": 33.0,
+            "1289": 43.0,
+            "1290": 37.0,
+            "1291": 42.0,
+            "1292": 38.0,
+            "1293": 43.0,
+            "1294": 30.0,
+            "1295": 34.0,
+            "1296": 31.0,
+            "1297": 26.0,
+            "1298": 38.0,
+            "1299": 40.0,
+            "1300": 32.0,
+            "1301": 43.0,
+            "1302": 35.0,
+            "1303": 35.0,
+            "1304": 41.0,
+            "1305": 30.0,
+            "1306": 28.0,
+            "1307": 34.0,
+            "1308": 32.0,
+            "1309": 36.0,
+            "1310": 29.0,
+            "1311": 43.0,
+            "1312": 32.0,
+            "1313": 37.0,
+            "1314": 35.0,
+            "1315": 33.0,
+            "1316": 37.0,
+            "1317": 33.0,
+            "1318": 41.0,
+            "1319": 28.0,
+            "1320": 42.0,
+            "1321": 30.0,
+            "1322": 21.0,
+            "1323": 28.0,
+            "1324": 40.0,
+            "1325": 36.0,
+            "1326": 43.0,
+            "1327": 32.0,
+            "1328": 35.0,
+            "1329": 33.0,
+            "1330": 27.0,
+            "1331": 30.0,
+            "1332": 36.0,
+            "1333": 45.0,
+            "1334": 32.0,
+            "1335": 41.0,
+            "1336": 38.0,
+            "1337": 37.0,
+            "1338": 38.0,
+            "1339": 27.0,
+            "1340": 33.0,
+            "1341": 47.0,
+            "1342": 24.0,
+            "1343": 27.0,
+            "1344": 34.0,
+            "1345": 34.0,
+            "1346": 21.0,
+            "1347": 33.0,
+            "1348": 33.0,
+            "1349": 42.0,
+            "1350": 30.0,
+            "1351": 39.0,
+            "1352": 26.0,
+            "1353": 36.0,
+            "1354": 40.0,
+            "1355": 31.0,
+            "1356": 46.0,
+            "1357": 46.0,
+            "1358": 29.0,
+            "1359": 29.0,
+            "1360": 30.0,
+            "1361": 35.0,
+            "1362": 40.0,
+            "1363": 33.0,
+            "1364": 36.0,
+            "1365": 34.0,
+            "1366": 47.0,
+            "1367": 31.0,
+            "1368": 37.0,
+            "1369": 28.0,
+            "1370": 41.0,
+            "1371": 30.0,
+            "1372": 42.0,
+            "1373": 44.0,
+            "1374": 34.0,
+            "1375": 22.0,
+            "1376": 47.0,
+            "1377": 29.0,
+            "1378": 39.0,
+            "1379": 49.0,
+            "1380": 44.0,
+            "1381": 30.0,
+            "1382": 45.0,
+            "1383": 44.0,
+            "1384": 31.0,
+            "1385": 35.0,
+            "1386": 31.0,
+            "1387": 31.0,
+            "1388": 22.0,
+            "1389": 32.0,
+            "1390": 38.0,
+            "1391": 42.0,
+            "1392": 34.0,
+            "1393": 43.0,
+            "1394": 33.0,
+            "1395": 39.0,
+            "1396": 37.0,
+            "1397": 27.0,
+            "1398": 33.0,
+            "1399": 29.0,
+            "1400": 36.0,
+            "1401": 28.0,
+            "1402": 27.0,
+            "1403": 23.0,
+            "1404": 28.0,
+            "1405": 36.0,
+            "1406": 29.0,
+            "1407": 36.0,
+            "1408": 43.0,
+            "1409": 37.0,
+            "1410": 37.0,
+            "1411": 38.0,
+            "1412": 28.0,
+            "1413": 48.0,
+            "1414": 34.0,
+            "1415": 42.0,
+            "1416": 35.0,
+            "1417": 34.0,
+            "1418": 43.0,
+            "1419": 38.0,
+            "1420": 33.0,
+            "1421": 33.0,
+            "1422": 53.0,
+            "1423": 22.0,
+            "1424": 35.0,
+            "1425": 43.0,
+            "1426": 36.0,
+            "1427": 43.0,
+            "1428": 31.0,
+            "1429": 30.0,
+            "1430": 36.0,
+            "1431": 29.0,
+            "1432": 37.0,
+            "1433": 32.0,
+            "1434": 47.0,
+            "1435": 38.0,
+            "1436": 40.0,
+            "1437": 47.0,
+            "1438": 28.0,
+            "1439": 33.0,
+            "1440": 25.0,
+            "1441": 35.0,
+            "1442": 38.0,
+            "1443": 42.0,
+            "1444": 28.0,
+            "1445": 34.0,
+            "1446": 28.0,
+            "1447": 39.0,
+            "1448": 45.0,
+            "1449": 41.0,
+            "1450": 25.0,
+            "1451": 38.0,
+            "1452": 27.0,
+            "1453": 28.0,
+            "1454": 28.0,
+            "1455": 32.0,
+            "1456": 40.0,
+            "1457": 33.0,
+            "1458": 37.0,
+            "1459": 41.0,
+            "1460": 31.0,
+            "1461": 34.0,
+            "1462": 23.0,
+            "1463": 33.0,
+            "1464": 42.0,
+            "1465": 42.0,
+            "1466": 29.0,
+            "1467": 27.0,
+            "1468": 41.0,
+            "1469": 30.0,
+            "1470": 35.0,
+            "1471": 32.0,
+            "1472": 44.0,
+            "1473": 53.0,
+            "1474": 28.0,
+            "1475": 25.0,
+            "1476": 47.0,
+            "1477": 40.0,
+            "1478": 26.0,
+            "1479": 33.0,
+            "1480": 33.0,
+            "1481": 33.0,
+            "1482": 33.0,
+            "1483": 31.0,
+            "1484": 31.0,
+            "1485": 45.0,
+            "1486": 37.0,
+            "1487": 32.0,
+            "1488": 26.0,
+            "1489": 45.0,
+            "1490": 40.0,
+            "1491": 44.0,
+            "1492": 44.0,
+            "1493": 44.0,
+            "1494": 33.0,
+            "1495": 42.0,
+            "1496": 32.0,
+            "1497": 39.0,
+            "1498": 32.0,
+            "1499": 42.0,
+            "1500": 42.0,
+            "1501": 46.0,
+            "1502": 46.0,
+            "1503": 39.0,
+            "1504": 31.0,
+            "1505": 47.0,
+            "1506": 41.0,
+            "1507": 35.0,
+            "1508": 39.0,
+            "1509": 32.0,
+            "1510": 37.0,
+            "1511": 52.0,
+            "1512": 29.0,
+            "1513": 46.0,
+            "1514": 40.0,
+            "1515": 41.0,
+            "1516": 31.0,
+            "1517": 39.0,
+            "1518": 40.0,
+            "1519": 32.0,
+            "1520": 34.0,
+            "1521": 44.0,
+            "1522": 53.0,
+            "1523": 40.0,
+            "1524": 39.0,
+            "1525": 30.0,
+            "1526": 34.0,
+            "1527": 19.0,
+            "1528": 40.0,
+            "1529": 30.0,
+            "1530": 38.0,
+            "1531": 28.0,
+            "1532": 30.0,
+            "1533": 43.0,
+            "1534": 34.0,
+            "1535": 35.0,
+            "1536": 34.0,
+            "1537": 33.0,
+            "1538": 36.0,
+            "1539": 32.0,
+            "1540": 38.0,
+            "1541": 35.0,
+            "1542": 50.0,
+            "1543": 50.0,
+            "1544": 38.0,
+            "1545": 38.0,
+            "1546": 35.0,
+            "1547": 31.0,
+            "1548": 39.0,
+            "1549": 36.0,
+            "1550": 30.0,
+            "1551": 42.0,
+            "1552": 49.0,
+            "1553": 46.0,
+            "1554": 41.0,
+            "1555": 25.0,
+            "1556": 33.0,
+            "1557": 46.0,
+            "1558": 43.0,
+            "1559": 36.0,
+            "1560": 30.0,
+            "1561": 48.0,
+            "1562": 30.0,
+            "1563": 38.0,
+            "1564": 40.0,
+            "1565": 30.0,
+            "1566": 34.0,
+            "1567": 36.0,
+            "1568": 43.0,
+            "1569": 35.0,
+            "1570": 43.0,
+            "1571": 32.0,
+            "1572": 34.0,
+            "1573": 35.0,
+            "1574": 31.0,
+            "1575": 39.0,
+            "1576": 30.0,
+            "1577": 41.0,
+            "1578": 46.0,
+            "1579": 35.0,
+            "1580": 39.0,
+            "1581": 43.0,
+            "1582": 30.0,
+            "1583": 43.0,
+            "1584": 36.0,
+            "1585": 37.0,
+            "1586": 44.0,
+            "1587": 37.0,
+            "1588": 43.0,
+            "1589": 41.0,
+            "1590": 46.0,
+            "1591": 32.0,
+            "1592": 37.0,
+            "1593": 32.0,
+            "1594": 36.0,
+            "1595": 27.0,
+            "1596": 40.0,
+            "1597": 36.0,
+            "1598": 36.0,
+            "1599": 32.0,
+            "1600": 41.0,
+            "1601": 34.0,
+            "1602": 38.0,
+            "1603": 48.0,
+            "1604": 29.0,
+            "1605": 42.0,
+            "1606": 33.0,
+            "1607": 41.0,
+            "1608": 40.0,
+            "1609": 42.0,
+            "1610": 37.0,
+            "1611": 35.0,
+            "1612": 37.0,
+            "1613": 39.0,
+            "1614": 51.0,
+            "1615": 38.0,
+            "1616": 33.0,
+            "1617": 45.0,
+            "1618": 43.0,
+            "1619": 32.0,
+            "1620": 43.0,
+            "1621": 47.0,
+            "1622": 36.0,
+            "1623": 50.0,
+            "1624": 40.0,
+            "1625": 33.0,
+            "1626": 39.0,
+            "1627": 34.0,
+            "1628": 40.0,
+            "1629": 30.0,
+            "1630": 34.0,
+            "1631": 45.0,
+            "1632": 39.0,
+            "1633": 40.0,
+            "1634": 30.0,
+            "1635": 53.0,
+            "1636": 31.0,
+            "1637": 35.0,
+            "1638": 39.0,
+            "1639": 42.0,
+            "1640": 37.0,
+            "1641": 43.0,
+            "1642": 30.0,
+            "1643": 43.0,
+            "1644": 36.0,
+            "1645": 37.0,
+            "1646": 61.0,
+            "1647": 34.0,
+            "1648": 41.0,
+            "1649": 39.0,
+            "1650": 42.0,
+            "1651": 33.0,
+            "1652": 45.0,
+            "1653": 25.0,
+            "1654": 36.0,
+            "1655": 29.0,
+            "1656": 45.0,
+            "1657": 37.0,
+            "1658": 46.0,
+            "1659": 38.0,
+            "1660": 46.0,
+            "1661": 41.0,
+            "1662": 35.0,
+            "1663": 35.0,
+            "1664": 37.0,
+            "1665": 30.0,
+            "1666": 44.0,
+            "1667": 45.0,
+            "1668": 40.0,
+            "1669": 35.0,
+            "1670": 35.0,
+            "1671": 37.0,
+            "1672": 32.0,
+            "1673": 48.0,
+            "1674": 41.0,
+            "1675": 40.0,
+            "1676": 49.0,
+            "1677": 35.0,
+            "1678": 30.0,
+            "1679": 45.0,
+            "1680": 40.0,
+            "1681": 32.0,
+            "1682": 32.0,
+            "1683": 42.0,
+            "1684": 44.0,
+            "1685": 47.0,
+            "1686": 30.0,
+            "1687": 31.0,
+            "1688": 31.0,
+            "1689": 40.0,
+            "1690": 43.0,
+            "1691": 36.0,
+            "1692": 31.0,
+            "1693": 31.0,
+            "1694": 35.0,
+            "1695": 41.0,
+            "1696": 32.0,
+            "1697": 27.0,
+            "1698": 39.0,
+            "1699": 41.0,
+            "1700": 31.0,
+            "1701": 35.0,
+            "1702": 31.0,
+            "1703": 40.0,
+            "1704": 36.0,
+            "1705": 36.0,
+            "1706": 46.0,
+            "1707": 26.0,
+            "1708": 37.0,
+            "1709": 37.0,
+            "1710": 39.0,
+            "1711": 32.0,
+            "1712": 46.0,
+            "1713": 44.0,
+            "1714": 45.0,
+            "1715": 43.0,
+            "1716": 30.0,
+            "1717": 41.0,
+            "1718": 43.0,
+            "1719": 28.0,
+            "1720": 36.0,
+            "1721": 26.0,
+            "1722": 42.0,
+            "1723": 42.0,
+            "1724": 39.0,
+            "1725": 28.0,
+            "1726": 46.0,
+            "1727": 43.0,
+            "1728": 40.0,
+            "1729": 44.0,
+            "1730": 38.0,
+            "1731": 26.0,
+            "1732": 39.0,
+            "1733": 44.0,
+            "1734": 39.0,
+            "1735": 34.0,
+            "1736": 46.0,
+            "1737": 46.0,
+            "1738": 34.0,
+            "1739": 47.0,
+            "1740": 44.0,
+            "1741": 31.0,
+            "1742": 46.0,
+            "1743": 43.0,
+            "1744": 46.0,
+            "1745": 53.0,
+            "1746": 42.0,
+            "1747": 37.0,
+            "1748": 37.0,
+            "1749": 47.0,
+            "1750": 46.0,
+            "1751": 43.0,
+            "1752": 35.0,
+            "1753": 41.0,
+            "1754": 40.0,
+            "1755": 32.0,
+            "1756": 36.0,
+            "1757": 48.0,
+            "1758": 34.0,
+            "1759": 49.0,
+            "1760": 46.0,
+            "1761": 36.0,
+            "1762": 34.0,
+            "1763": 36.0,
+            "1764": 39.0,
+            "1765": 24.0,
+            "1766": 46.0,
+            "1767": 46.0,
+            "1768": 36.0,
+            "1769": 56.0,
+            "1770": 28.0,
+            "1771": 42.0,
+            "1772": 52.0,
+            "1773": 45.0,
+            "1774": 37.0,
+            "1775": 33.0,
+            "1776": 43.0,
+            "1777": 54.0,
+            "1778": 39.0,
+            "1779": 33.0,
+            "1780": 39.0,
+            "1781": 45.0,
+            "1782": 35.0,
+            "1783": 43.0,
+            "1784": 53.0,
+            "1785": 36.0,
+            "1786": 38.0,
+            "1787": 43.0,
+            "1788": 45.0,
+            "1789": 33.0,
+            "1790": 42.0,
+            "1791": 44.0,
+            "1792": 34.0,
+            "1793": 30.0,
+            "1794": 40.0,
+            "1795": 55.0,
+            "1796": 33.0,
+            "1797": 30.0,
+            "1798": 41.0,
+            "1799": 37.0,
+            "1800": 41.0,
+            "1801": 40.0,
+            "1802": 30.0,
+            "1803": 36.0,
+            "1804": 41.0,
+            "1805": 34.0,
+            "1806": 39.0,
+            "1807": 36.0,
+            "1808": 43.0,
+            "1809": 45.0,
+            "1810": 41.0,
+            "1811": 28.0,
+            "1812": 33.0,
+            "1813": 30.0,
+            "1814": 36.0,
+            "1815": 35.0,
+            "1816": 35.0,
+            "1817": 35.0,
+            "1818": 42.0,
+            "1819": 25.0,
+            "1820": 38.0,
+            "1821": 48.0,
+            "1822": 38.0,
+            "1823": 38.0,
+            "1824": 49.0,
+            "1825": 46.0,
+            "1826": 32.0,
+            "1827": 47.0,
+            "1828": 30.0,
+            "1829": 50.0,
+            "1830": 43.0,
+            "1831": 36.0,
+            "1832": 47.0,
+            "1833": 42.0,
+            "1834": 41.0,
+            "1835": 39.0,
+            "1836": 39.0,
+            "1837": 34.0,
+            "1838": 50.0,
+            "1839": 35.0,
+            "1840": 41.0,
+            "1841": 30.0,
+            "1842": 34.0,
+            "1843": 44.0,
+            "1844": 38.0,
+            "1845": 41.0,
+            "1846": 32.0,
+            "1847": 32.0,
+            "1848": 36.0,
+            "1849": 45.0,
+            "1850": 40.0,
+            "1851": 36.0,
+            "1852": 41.0,
+            "1853": 29.0,
+            "1854": 35.0,
+            "1855": 45.0,
+            "1856": 39.0,
+            "1857": 33.0,
+            "1858": 40.0,
+            "1859": 40.0,
+            "1860": 48.0,
+            "1861": 37.0,
+            "1862": 46.0,
+            "1863": 47.0,
+            "1864": 48.0,
+            "1865": 38.0,
+            "1866": 51.0,
+            "1867": 34.0,
+            "1868": 40.0,
+            "1869": 42.0,
+            "1870": 38.0,
+            "1871": 36.0,
+            "1872": 42.0,
+            "1873": 42.0,
+            "1874": 38.0,
+            "1875": 51.0,
+            "1876": 39.0,
+            "1877": 41.0,
+            "1878": 26.0,
+            "1879": 33.0,
+            "1880": 41.0,
+            "1881": 50.0,
+            "1882": 37.0,
+            "1883": 45.0,
+            "1884": 39.0,
+            "1885": 37.0,
+            "1886": 32.0,
+            "1887": 36.0,
+            "1888": 28.0,
+            "1889": 38.0,
+            "1890": 37.0,
+            "1891": 51.0,
+            "1892": 44.0,
+            "1893": 50.0,
+            "1894": 44.0,
+            "1895": 35.0,
+            "1896": 34.0,
+            "1897": 35.0,
+            "1898": 31.0,
+            "1899": 39.0,
+            "1900": 40.0,
+            "1901": 52.0,
+            "1902": 31.0,
+            "1903": 44.0,
+            "1904": 45.0,
+            "1905": 32.0,
+            "1906": 49.0,
+            "1907": 34.0,
+            "1908": 33.0,
+            "1909": 34.0,
+            "1910": 45.0,
+            "1911": 41.0,
+            "1912": 46.0,
+            "1913": 46.0,
+            "1914": 51.0,
+            "1915": 35.0,
+            "1916": 42.0,
+            "1917": 40.0,
+            "1918": 32.0,
+            "1919": 54.0,
+            "1920": 41.0,
+            "1921": 40.0,
+            "1922": 36.0,
+            "1923": 34.0,
+            "1924": 43.0,
+            "1925": 47.0,
+            "1926": 42.0,
+            "1927": 37.0,
+            "1928": 40.0,
+            "1929": 40.0,
+            "1930": 39.0,
+            "1931": 37.0,
+            "1932": 40.0,
+            "1933": 46.0,
+            "1934": 30.0,
+            "1935": 50.0,
+            "1936": 51.0,
+            "1937": 34.0,
+            "1938": 38.0,
+            "1939": 44.0,
+            "1940": 35.0,
+            "1941": 39.0,
+            "1942": 59.0,
+            "1943": 42.0,
+            "1944": 46.0,
+            "1945": 36.0,
+            "1946": 43.0,
+            "1947": 39.0,
+            "1948": 39.0,
+            "1949": 31.0,
+            "1950": 36.0,
+            "1951": 41.0,
+            "1952": 37.0,
+            "1953": 26.0,
+            "1954": 43.0,
+            "1955": 33.0,
+            "1956": 37.0,
+            "1957": 48.0,
+            "1958": 35.0,
+            "1959": 44.0,
+            "1960": 35.0,
+            "1961": 28.0,
+            "1962": 51.0,
+            "1963": 47.0,
+            "1964": 33.0,
+            "1965": 56.0,
+            "1966": 46.0,
+            "1967": 33.0,
+            "1968": 53.0,
+            "1969": 36.0,
+            "1970": 47.0,
+            "1971": 35.0,
+            "1972": 34.0,
+            "1973": 38.0,
+            "1974": 46.0,
+            "1975": 32.0,
+            "1976": 43.0,
+            "1977": 38.0,
+            "1978": 43.0,
+            "1979": 49.0,
+            "1980": 32.0,
+            "1981": 30.0,
+            "1982": 55.0,
+            "1983": 41.0,
+            "1984": 62.0,
+            "1985": 41.0,
+            "1986": 48.0,
+            "1987": 48.0,
+            "1988": 41.0,
+            "1989": 50.0,
+            "1990": 53.0,
+            "1991": 45.0,
+            "1992": 46.0,
+            "1993": 60.0,
+            "1994": 30.0,
+            "1995": 41.0,
+            "1996": 51.0,
+            "1997": 41.0,
+            "1998": 45.0,
+            "1999": 32.0,
+            "2000": 43.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 302618112.0,
+            "2": 302618112.0,
+            "3": 302618112.0,
+            "4": 302618112.0,
+            "5": 302618112.0,
+            "6": 302618112.0,
+            "7": 302618112.0,
+            "8": 302618112.0,
+            "9": 302618112.0,
+            "10": 302618112.0,
+            "11": 302618112.0,
+            "12": 302618112.0,
+            "13": 302618112.0,
+            "14": 302618112.0,
+            "15": 302618112.0,
+            "16": 302618112.0,
+            "17": 302618112.0,
+            "18": 302618112.0,
+            "19": 302618112.0,
+            "20": 302618112.0,
+            "21": 302618112.0,
+            "22": 302618112.0,
+            "23": 302618112.0,
+            "24": 302618112.0,
+            "25": 302618112.0,
+            "26": 302618112.0,
+            "27": 302618112.0,
+            "28": 302618112.0,
+            "29": 302618112.0,
+            "30": 302618112.0,
+            "31": 302618112.0,
+            "32": 302618112.0,
+            "33": 302618112.0,
+            "34": 302618112.0,
+            "35": 302618112.0,
+            "36": 302618112.0,
+            "37": 302618112.0,
+            "38": 302618112.0,
+            "39": 302618112.0,
+            "40": 302618112.0,
+            "41": 302618112.0,
+            "42": 302618112.0,
+            "43": 302618112.0,
+            "44": 302618112.0,
+            "45": 302618112.0,
+            "46": 302618112.0,
+            "47": 302618112.0,
+            "48": 302618112.0,
+            "49": 302618112.0,
+            "50": 302618112.0,
+            "51": 302618112.0,
+            "52": 302618112.0,
+            "53": 302618112.0,
+            "54": 302618112.0,
+            "55": 302618112.0,
+            "56": 302618112.0,
+            "57": 302618112.0,
+            "58": 302618112.0,
+            "59": 302618112.0,
+            "60": 302618112.0,
+            "61": 302618112.0,
+            "62": 302618112.0,
+            "63": 302618112.0,
+            "64": 302618112.0,
+            "65": 302618112.0,
+            "66": 302618112.0,
+            "67": 302618112.0,
+            "68": 302618112.0,
+            "69": 302618112.0,
+            "70": 302618112.0,
+            "71": 302618112.0,
+            "72": 302618112.0,
+            "73": 302618112.0,
+            "74": 302618112.0,
+            "75": 302618112.0,
+            "76": 302618112.0,
+            "77": 302618112.0,
+            "78": 302618112.0,
+            "79": 302618112.0,
+            "80": 302618112.0,
+            "81": 302618112.0,
+            "82": 302618112.0,
+            "83": 302618112.0,
+            "84": 302618112.0,
+            "85": 302618112.0,
+            "86": 302618112.0,
+            "87": 302618112.0,
+            "88": 302618112.0,
+            "89": 302618112.0,
+            "90": 302618112.0,
+            "91": 302618112.0,
+            "92": 302618112.0,
+            "93": 302618112.0,
+            "94": 302618112.0,
+            "95": 302618112.0,
+            "96": 302618112.0,
+            "97": 302618112.0,
+            "98": 302618112.0,
+            "99": 302618112.0,
+            "100": 302618112.0,
+            "101": 302618112.0,
+            "102": 302618112.0,
+            "103": 302618112.0,
+            "104": 302618112.0,
+            "105": 302618112.0,
+            "106": 302618112.0,
+            "107": 302618112.0,
+            "108": 302618112.0,
+            "109": 302618112.0,
+            "110": 302618112.0,
+            "111": 302618112.0,
+            "112": 302618112.0,
+            "113": 302618112.0,
+            "114": 302618112.0,
+            "115": 302618112.0,
+            "116": 302618112.0,
+            "117": 302618112.0,
+            "118": 302618112.0,
+            "119": 302618112.0,
+            "120": 302618112.0,
+            "121": 302618112.0,
+            "122": 302618112.0,
+            "123": 302618112.0,
+            "124": 302618112.0,
+            "125": 302618112.0,
+            "126": 302618112.0,
+            "127": 302618112.0,
+            "128": 302618112.0,
+            "129": 302618112.0,
+            "130": 302618112.0,
+            "131": 302618112.0,
+            "132": 302618112.0,
+            "133": 302618112.0,
+            "134": 302618112.0,
+            "135": 302618112.0,
+            "136": 302618112.0,
+            "137": 302618112.0,
+            "138": 302618112.0,
+            "139": 302618112.0,
+            "140": 302618112.0,
+            "141": 302618112.0,
+            "142": 302618112.0,
+            "143": 302618112.0,
+            "144": 302618112.0,
+            "145": 302618112.0,
+            "146": 302618112.0,
+            "147": 302618112.0,
+            "148": 302618112.0,
+            "149": 302618112.0,
+            "150": 302618112.0,
+            "151": 302618112.0,
+            "152": 302618112.0,
+            "153": 302618112.0,
+            "154": 302618112.0,
+            "155": 302618112.0,
+            "156": 302618112.0,
+            "157": 302618112.0,
+            "158": 302618112.0,
+            "159": 302618112.0,
+            "160": 302618112.0,
+            "161": 302618112.0,
+            "162": 302618112.0,
+            "163": 302618112.0,
+            "164": 302618112.0,
+            "165": 302618112.0,
+            "166": 302618112.0,
+            "167": 302618112.0,
+            "168": 302618112.0,
+            "169": 302618112.0,
+            "170": 302618112.0,
+            "171": 302618112.0,
+            "172": 302618112.0,
+            "173": 302618112.0,
+            "174": 302618112.0,
+            "175": 302618112.0,
+            "176": 302618112.0,
+            "177": 302618112.0,
+            "178": 302618112.0,
+            "179": 302618112.0,
+            "180": 302618112.0,
+            "181": 302618112.0,
+            "182": 302618112.0,
+            "183": 302618112.0,
+            "184": 302618112.0,
+            "185": 302618112.0,
+            "186": 302618112.0,
+            "187": 302618112.0,
+            "188": 302618112.0,
+            "189": 302618112.0,
+            "190": 302618112.0,
+            "191": 302618112.0,
+            "192": 302618112.0,
+            "193": 302618112.0,
+            "194": 302618112.0,
+            "195": 302618112.0,
+            "196": 302618112.0,
+            "197": 302618112.0,
+            "198": 302618112.0,
+            "199": 302618112.0,
+            "200": 302618112.0,
+            "201": 302618112.0,
+            "202": 302618112.0,
+            "203": 302618112.0,
+            "204": 302618112.0,
+            "205": 302618112.0,
+            "206": 302618112.0,
+            "207": 302618112.0,
+            "208": 302618112.0,
+            "209": 302618112.0,
+            "210": 302618112.0,
+            "211": 302618112.0,
+            "212": 302618112.0,
+            "213": 302618112.0,
+            "214": 302618112.0,
+            "215": 302618112.0,
+            "216": 302618112.0,
+            "217": 302618112.0,
+            "218": 302618112.0,
+            "219": 302618112.0,
+            "220": 302618112.0,
+            "221": 302618112.0,
+            "222": 302618112.0,
+            "223": 302618112.0,
+            "224": 302618112.0,
+            "225": 302618112.0,
+            "226": 302618112.0,
+            "227": 302618112.0,
+            "228": 302618112.0,
+            "229": 302618112.0,
+            "230": 302618112.0,
+            "231": 302618112.0,
+            "232": 302618112.0,
+            "233": 302618112.0,
+            "234": 302618112.0,
+            "235": 302618112.0,
+            "236": 302618112.0,
+            "237": 302618112.0,
+            "238": 302618112.0,
+            "239": 302618112.0,
+            "240": 302618112.0,
+            "241": 302618112.0,
+            "242": 302618112.0,
+            "243": 302618112.0,
+            "244": 302618112.0,
+            "245": 302618112.0,
+            "246": 302618112.0,
+            "247": 302618112.0,
+            "248": 302618112.0,
+            "249": 302618112.0,
+            "250": 302618112.0,
+            "251": 302618112.0,
+            "252": 302618112.0,
+            "253": 302618112.0,
+            "254": 302618112.0,
+            "255": 302618112.0,
+            "256": 302618112.0,
+            "257": 302618112.0,
+            "258": 302618112.0,
+            "259": 302618112.0,
+            "260": 302618112.0,
+            "261": 302618112.0,
+            "262": 302618112.0,
+            "263": 302618112.0,
+            "264": 302618112.0,
+            "265": 302618112.0,
+            "266": 302618112.0,
+            "267": 302618112.0,
+            "268": 302618112.0,
+            "269": 302618112.0,
+            "270": 302618112.0,
+            "271": 302618112.0,
+            "272": 302618112.0,
+            "273": 302618112.0,
+            "274": 302618112.0,
+            "275": 302618112.0,
+            "276": 302618112.0,
+            "277": 302618112.0,
+            "278": 302618112.0,
+            "279": 302618112.0,
+            "280": 302618112.0,
+            "281": 302618112.0,
+            "282": 302618112.0,
+            "283": 302618112.0,
+            "284": 302618112.0,
+            "285": 302618112.0,
+            "286": 302618112.0,
+            "287": 302618112.0,
+            "288": 302618112.0,
+            "289": 302618112.0,
+            "290": 302618112.0,
+            "291": 302618112.0,
+            "292": 302618112.0,
+            "293": 302618112.0,
+            "294": 302618112.0,
+            "295": 302618112.0,
+            "296": 302618112.0,
+            "297": 302618112.0,
+            "298": 302618112.0,
+            "299": 302618112.0,
+            "300": 302618112.0,
+            "301": 302618112.0,
+            "302": 302618112.0,
+            "303": 302618112.0,
+            "304": 302618112.0,
+            "305": 302618112.0,
+            "306": 302618112.0,
+            "307": 302618112.0,
+            "308": 302618112.0,
+            "309": 302618112.0,
+            "310": 302618112.0,
+            "311": 302618112.0,
+            "312": 302618112.0,
+            "313": 302618112.0,
+            "314": 302618112.0,
+            "315": 302618112.0,
+            "316": 302618112.0,
+            "317": 302618112.0,
+            "318": 302618112.0,
+            "319": 302618112.0,
+            "320": 302618112.0,
+            "321": 302618112.0,
+            "322": 302618112.0,
+            "323": 302618112.0,
+            "324": 302618112.0,
+            "325": 302618112.0,
+            "326": 302618112.0,
+            "327": 302618112.0,
+            "328": 302618112.0,
+            "329": 302618112.0,
+            "330": 302618112.0,
+            "331": 302618112.0,
+            "332": 302618112.0,
+            "333": 302618112.0,
+            "334": 302618112.0,
+            "335": 302618112.0,
+            "336": 302618112.0,
+            "337": 302618112.0,
+            "338": 302618112.0,
+            "339": 302618112.0,
+            "340": 302618112.0,
+            "341": 302618112.0,
+            "342": 302618112.0,
+            "343": 302618112.0,
+            "344": 302618112.0,
+            "345": 302618112.0,
+            "346": 302618112.0,
+            "347": 302618112.0,
+            "348": 302618112.0,
+            "349": 302618112.0,
+            "350": 302618112.0,
+            "351": 302618112.0,
+            "352": 302618112.0,
+            "353": 302618112.0,
+            "354": 302618112.0,
+            "355": 302618112.0,
+            "356": 302618112.0,
+            "357": 302618112.0,
+            "358": 302618112.0,
+            "359": 302618112.0,
+            "360": 302618112.0,
+            "361": 302618112.0,
+            "362": 302618112.0,
+            "363": 302618112.0,
+            "364": 302618112.0,
+            "365": 302618112.0,
+            "366": 302618112.0,
+            "367": 302618112.0,
+            "368": 302618112.0,
+            "369": 302618112.0,
+            "370": 302618112.0,
+            "371": 302618112.0,
+            "372": 302618112.0,
+            "373": 302618112.0,
+            "374": 302618112.0,
+            "375": 302618112.0,
+            "376": 302618112.0,
+            "377": 302618112.0,
+            "378": 302618112.0,
+            "379": 302618112.0,
+            "380": 302618112.0,
+            "381": 302618112.0,
+            "382": 302618112.0,
+            "383": 302618112.0,
+            "384": 302618112.0,
+            "385": 302618112.0,
+            "386": 302618112.0,
+            "387": 302618112.0,
+            "388": 302618112.0,
+            "389": 302618112.0,
+            "390": 302618112.0,
+            "391": 302618112.0,
+            "392": 302618112.0,
+            "393": 302618112.0,
+            "394": 302618112.0,
+            "395": 302618112.0,
+            "396": 302618112.0,
+            "397": 302618112.0,
+            "398": 302618112.0,
+            "399": 302618112.0,
+            "400": 302618112.0,
+            "401": 302618112.0,
+            "402": 302618112.0,
+            "403": 302618112.0,
+            "404": 302618112.0,
+            "405": 302618112.0,
+            "406": 302618112.0,
+            "407": 302618112.0,
+            "408": 302618112.0,
+            "409": 302618112.0,
+            "410": 302618112.0,
+            "411": 302618112.0,
+            "412": 302618112.0,
+            "413": 302618112.0,
+            "414": 302618112.0,
+            "415": 302618112.0,
+            "416": 302618112.0,
+            "417": 302618112.0,
+            "418": 302618112.0,
+            "419": 302618112.0,
+            "420": 302618112.0,
+            "421": 302618112.0,
+            "422": 302618112.0,
+            "423": 302618112.0,
+            "424": 302618112.0,
+            "425": 302618112.0,
+            "426": 302618112.0,
+            "427": 302618112.0,
+            "428": 302618112.0,
+            "429": 302618112.0,
+            "430": 302618112.0,
+            "431": 302618112.0,
+            "432": 302618112.0,
+            "433": 302618112.0,
+            "434": 302618112.0,
+            "435": 302618112.0,
+            "436": 302618112.0,
+            "437": 302618112.0,
+            "438": 302618112.0,
+            "439": 302618112.0,
+            "440": 302618112.0,
+            "441": 302618112.0,
+            "442": 302618112.0,
+            "443": 302618112.0,
+            "444": 302618112.0,
+            "445": 302618112.0,
+            "446": 302618112.0,
+            "447": 302618112.0,
+            "448": 302618112.0,
+            "449": 302618112.0,
+            "450": 302618112.0,
+            "451": 302618112.0,
+            "452": 302618112.0,
+            "453": 302618112.0,
+            "454": 302618112.0,
+            "455": 302618112.0,
+            "456": 302618112.0,
+            "457": 302618112.0,
+            "458": 302618112.0,
+            "459": 302618112.0,
+            "460": 302618112.0,
+            "461": 302618112.0,
+            "462": 302618112.0,
+            "463": 302618112.0,
+            "464": 302618112.0,
+            "465": 302618112.0,
+            "466": 302618112.0,
+            "467": 302618112.0,
+            "468": 302618112.0,
+            "469": 302618112.0,
+            "470": 302618112.0,
+            "471": 302618112.0,
+            "472": 302618112.0,
+            "473": 302618112.0,
+            "474": 302618112.0,
+            "475": 302618112.0,
+            "476": 302618112.0,
+            "477": 302618112.0,
+            "478": 302618112.0,
+            "479": 302618112.0,
+            "480": 302618112.0,
+            "481": 302618112.0,
+            "482": 302618112.0,
+            "483": 302618112.0,
+            "484": 302618112.0,
+            "485": 302618112.0,
+            "486": 302618112.0,
+            "487": 302618112.0,
+            "488": 302618112.0,
+            "489": 302618112.0,
+            "490": 302618112.0,
+            "491": 302618112.0,
+            "492": 302618112.0,
+            "493": 302618112.0,
+            "494": 302618112.0,
+            "495": 302618112.0,
+            "496": 302618112.0,
+            "497": 302618112.0,
+            "498": 302618112.0,
+            "499": 302618112.0,
+            "500": 302618112.0,
+            "501": 302618112.0,
+            "502": 302618112.0,
+            "503": 302618112.0,
+            "504": 302618112.0,
+            "505": 302618112.0,
+            "506": 302618112.0,
+            "507": 302618112.0,
+            "508": 302618112.0,
+            "509": 302618112.0,
+            "510": 302618112.0,
+            "511": 302618112.0,
+            "512": 302618112.0,
+            "513": 302618112.0,
+            "514": 302618112.0,
+            "515": 302618112.0,
+            "516": 302618112.0,
+            "517": 302618112.0,
+            "518": 302618112.0,
+            "519": 302618112.0,
+            "520": 302618112.0,
+            "521": 302618112.0,
+            "522": 302618112.0,
+            "523": 302618112.0,
+            "524": 302618112.0,
+            "525": 302618112.0,
+            "526": 302618112.0,
+            "527": 302618112.0,
+            "528": 302618112.0,
+            "529": 302618112.0,
+            "530": 302618112.0,
+            "531": 302618112.0,
+            "532": 302618112.0,
+            "533": 302618112.0,
+            "534": 302618112.0,
+            "535": 302618112.0,
+            "536": 302618112.0,
+            "537": 302618112.0,
+            "538": 302618112.0,
+            "539": 302618112.0,
+            "540": 302618112.0,
+            "541": 302618112.0,
+            "542": 302618112.0,
+            "543": 302618112.0,
+            "544": 302618112.0,
+            "545": 302618112.0,
+            "546": 302618112.0,
+            "547": 302618112.0,
+            "548": 302618112.0,
+            "549": 302618112.0,
+            "550": 302618112.0,
+            "551": 302618112.0,
+            "552": 302618112.0,
+            "553": 302618112.0,
+            "554": 302618112.0,
+            "555": 302618112.0,
+            "556": 302618112.0,
+            "557": 302618112.0,
+            "558": 302618112.0,
+            "559": 302618112.0,
+            "560": 302618112.0,
+            "561": 302618112.0,
+            "562": 302618112.0,
+            "563": 302618112.0,
+            "564": 302618112.0,
+            "565": 302618112.0,
+            "566": 302618112.0,
+            "567": 302618112.0,
+            "568": 302618112.0,
+            "569": 302618112.0,
+            "570": 302618112.0,
+            "571": 302618112.0,
+            "572": 302618112.0,
+            "573": 302618112.0,
+            "574": 302618112.0,
+            "575": 302618112.0,
+            "576": 302618112.0,
+            "577": 302618112.0,
+            "578": 302618112.0,
+            "579": 302618112.0,
+            "580": 302618112.0,
+            "581": 302618112.0,
+            "582": 302618112.0,
+            "583": 302618112.0,
+            "584": 302618112.0,
+            "585": 302618112.0,
+            "586": 302618112.0,
+            "587": 302618112.0,
+            "588": 302618112.0,
+            "589": 302618112.0,
+            "590": 302618112.0,
+            "591": 302618112.0,
+            "592": 302618112.0,
+            "593": 302618112.0,
+            "594": 302618112.0,
+            "595": 302618112.0,
+            "596": 302618112.0,
+            "597": 302618112.0,
+            "598": 302618112.0,
+            "599": 302618112.0,
+            "600": 302618112.0,
+            "601": 302618112.0,
+            "602": 302618112.0,
+            "603": 302618112.0,
+            "604": 302618112.0,
+            "605": 302618112.0,
+            "606": 302618112.0,
+            "607": 302618112.0,
+            "608": 302618112.0,
+            "609": 302618112.0,
+            "610": 302618112.0,
+            "611": 302618112.0,
+            "612": 302618112.0,
+            "613": 302618112.0,
+            "614": 302618112.0,
+            "615": 302618112.0,
+            "616": 302618112.0,
+            "617": 302618112.0,
+            "618": 302618112.0,
+            "619": 302618112.0,
+            "620": 302618112.0,
+            "621": 302618112.0,
+            "622": 302618112.0,
+            "623": 302618112.0,
+            "624": 302618112.0,
+            "625": 302618112.0,
+            "626": 302618112.0,
+            "627": 302618112.0,
+            "628": 302618112.0,
+            "629": 302618112.0,
+            "630": 302618112.0,
+            "631": 302618112.0,
+            "632": 302618112.0,
+            "633": 302618112.0,
+            "634": 302618112.0,
+            "635": 302618112.0,
+            "636": 302618112.0,
+            "637": 302618112.0,
+            "638": 302618112.0,
+            "639": 302618112.0,
+            "640": 302618112.0,
+            "641": 302618112.0,
+            "642": 302618112.0,
+            "643": 302618112.0,
+            "644": 302618112.0,
+            "645": 302618112.0,
+            "646": 302618112.0,
+            "647": 302618112.0,
+            "648": 302618112.0,
+            "649": 302618112.0,
+            "650": 302618112.0,
+            "651": 302618112.0,
+            "652": 302618112.0,
+            "653": 302618112.0,
+            "654": 302618112.0,
+            "655": 302618112.0,
+            "656": 302618112.0,
+            "657": 302618112.0,
+            "658": 302618112.0,
+            "659": 302618112.0,
+            "660": 302618112.0,
+            "661": 302618112.0,
+            "662": 302618112.0,
+            "663": 302618112.0,
+            "664": 302618112.0,
+            "665": 302618112.0,
+            "666": 302618112.0,
+            "667": 302618112.0,
+            "668": 302618112.0,
+            "669": 302618112.0,
+            "670": 302618112.0,
+            "671": 302618112.0,
+            "672": 302618112.0,
+            "673": 302618112.0,
+            "674": 302618112.0,
+            "675": 302618112.0,
+            "676": 302618112.0,
+            "677": 302618112.0,
+            "678": 302618112.0,
+            "679": 302618112.0,
+            "680": 302618112.0,
+            "681": 302618112.0,
+            "682": 302618112.0,
+            "683": 302618112.0,
+            "684": 302618112.0,
+            "685": 302618112.0,
+            "686": 302618112.0,
+            "687": 302618112.0,
+            "688": 302618112.0,
+            "689": 302618112.0,
+            "690": 302618112.0,
+            "691": 302618112.0,
+            "692": 302618112.0,
+            "693": 302618112.0,
+            "694": 302618112.0,
+            "695": 302618112.0,
+            "696": 302618112.0,
+            "697": 302618112.0,
+            "698": 302618112.0,
+            "699": 302618112.0,
+            "700": 302618112.0,
+            "701": 302618112.0,
+            "702": 302618112.0,
+            "703": 302618112.0,
+            "704": 302618112.0,
+            "705": 302618112.0,
+            "706": 302618112.0,
+            "707": 302618112.0,
+            "708": 302618112.0,
+            "709": 302618112.0,
+            "710": 302618112.0,
+            "711": 302618112.0,
+            "712": 302618112.0,
+            "713": 302618112.0,
+            "714": 302618112.0,
+            "715": 302618112.0,
+            "716": 302618112.0,
+            "717": 302618112.0,
+            "718": 302618112.0,
+            "719": 302618112.0,
+            "720": 302618112.0,
+            "721": 302618112.0,
+            "722": 302618112.0,
+            "723": 302618112.0,
+            "724": 302618112.0,
+            "725": 302618112.0,
+            "726": 302618112.0,
+            "727": 302618112.0,
+            "728": 302618112.0,
+            "729": 302618112.0,
+            "730": 302618112.0,
+            "731": 302618112.0,
+            "732": 302618112.0,
+            "733": 302618112.0,
+            "734": 302618112.0,
+            "735": 302618112.0,
+            "736": 302618112.0,
+            "737": 302618112.0,
+            "738": 302618112.0,
+            "739": 302618112.0,
+            "740": 302618112.0,
+            "741": 302618112.0,
+            "742": 302618112.0,
+            "743": 302618112.0,
+            "744": 302618112.0,
+            "745": 302618112.0,
+            "746": 302618112.0,
+            "747": 302618112.0,
+            "748": 302618112.0,
+            "749": 302618112.0,
+            "750": 302618112.0,
+            "751": 302618112.0,
+            "752": 302618112.0,
+            "753": 302618112.0,
+            "754": 302618112.0,
+            "755": 302618112.0,
+            "756": 302618112.0,
+            "757": 302618112.0,
+            "758": 302618112.0,
+            "759": 302618112.0,
+            "760": 302618112.0,
+            "761": 302618112.0,
+            "762": 302618112.0,
+            "763": 302618112.0,
+            "764": 302618112.0,
+            "765": 302618112.0,
+            "766": 302618112.0,
+            "767": 302618112.0,
+            "768": 302618112.0,
+            "769": 302618112.0,
+            "770": 302618112.0,
+            "771": 302618112.0,
+            "772": 302618112.0,
+            "773": 302618112.0,
+            "774": 302618112.0,
+            "775": 302618112.0,
+            "776": 302618112.0,
+            "777": 302618112.0,
+            "778": 302618112.0,
+            "779": 302618112.0,
+            "780": 302618112.0,
+            "781": 302618112.0,
+            "782": 302618112.0,
+            "783": 302618112.0,
+            "784": 302618112.0,
+            "785": 302618112.0,
+            "786": 302618112.0,
+            "787": 302618112.0,
+            "788": 302618112.0,
+            "789": 302618112.0,
+            "790": 302618112.0,
+            "791": 302618112.0,
+            "792": 302618112.0,
+            "793": 302618112.0,
+            "794": 302618112.0,
+            "795": 302618112.0,
+            "796": 302618112.0,
+            "797": 302618112.0,
+            "798": 302618112.0,
+            "799": 302618112.0,
+            "800": 302618112.0,
+            "801": 302618112.0,
+            "802": 302618112.0,
+            "803": 302618112.0,
+            "804": 302618112.0,
+            "805": 302618112.0,
+            "806": 302618112.0,
+            "807": 302618112.0,
+            "808": 302618112.0,
+            "809": 302618112.0,
+            "810": 302618112.0,
+            "811": 302618112.0,
+            "812": 302618112.0,
+            "813": 302618112.0,
+            "814": 302618112.0,
+            "815": 302618112.0,
+            "816": 302618112.0,
+            "817": 302618112.0,
+            "818": 302618112.0,
+            "819": 302618112.0,
+            "820": 302618112.0,
+            "821": 302618112.0,
+            "822": 302618112.0,
+            "823": 302618112.0,
+            "824": 302618112.0,
+            "825": 302618112.0,
+            "826": 302618112.0,
+            "827": 302618112.0,
+            "828": 302618112.0,
+            "829": 302618112.0,
+            "830": 302618112.0,
+            "831": 302618112.0,
+            "832": 302618112.0,
+            "833": 302618112.0,
+            "834": 302618112.0,
+            "835": 302618112.0,
+            "836": 302618112.0,
+            "837": 302618112.0,
+            "838": 302618112.0,
+            "839": 302618112.0,
+            "840": 302618112.0,
+            "841": 302618112.0,
+            "842": 302618112.0,
+            "843": 302618112.0,
+            "844": 302618112.0,
+            "845": 302618112.0,
+            "846": 302618112.0,
+            "847": 302618112.0,
+            "848": 302618112.0,
+            "849": 302618112.0,
+            "850": 302618112.0,
+            "851": 302618112.0,
+            "852": 302618112.0,
+            "853": 302618112.0,
+            "854": 302618112.0,
+            "855": 302618112.0,
+            "856": 302618112.0,
+            "857": 302618112.0,
+            "858": 302618112.0,
+            "859": 302618112.0,
+            "860": 302618112.0,
+            "861": 302618112.0,
+            "862": 302618112.0,
+            "863": 302618112.0,
+            "864": 302618112.0,
+            "865": 302618112.0,
+            "866": 302618112.0,
+            "867": 302618112.0,
+            "868": 302618112.0,
+            "869": 302618112.0,
+            "870": 302618112.0,
+            "871": 302618112.0,
+            "872": 302618112.0,
+            "873": 302618112.0,
+            "874": 302618112.0,
+            "875": 302618112.0,
+            "876": 302618112.0,
+            "877": 302618112.0,
+            "878": 302618112.0,
+            "879": 302618112.0,
+            "880": 302618112.0,
+            "881": 302618112.0,
+            "882": 302618112.0,
+            "883": 302618112.0,
+            "884": 302618112.0,
+            "885": 302618112.0,
+            "886": 302618112.0,
+            "887": 302618112.0,
+            "888": 302618112.0,
+            "889": 302618112.0,
+            "890": 302618112.0,
+            "891": 302618112.0,
+            "892": 302618112.0,
+            "893": 302618112.0,
+            "894": 302618112.0,
+            "895": 302618112.0,
+            "896": 302618112.0,
+            "897": 302618112.0,
+            "898": 302618112.0,
+            "899": 302618112.0,
+            "900": 302618112.0,
+            "901": 302618112.0,
+            "902": 302618112.0,
+            "903": 302618112.0,
+            "904": 302618112.0,
+            "905": 302618112.0,
+            "906": 302618112.0,
+            "907": 302618112.0,
+            "908": 302618112.0,
+            "909": 302618112.0,
+            "910": 302618112.0,
+            "911": 302618112.0,
+            "912": 302618112.0,
+            "913": 302618112.0,
+            "914": 302618112.0,
+            "915": 302618112.0,
+            "916": 302618112.0,
+            "917": 302618112.0,
+            "918": 302618112.0,
+            "919": 302618112.0,
+            "920": 302618112.0,
+            "921": 302618112.0,
+            "922": 302618112.0,
+            "923": 302618112.0,
+            "924": 302618112.0,
+            "925": 302618112.0,
+            "926": 302618112.0,
+            "927": 302618112.0,
+            "928": 302618112.0,
+            "929": 302618112.0,
+            "930": 302618112.0,
+            "931": 302618112.0,
+            "932": 302618112.0,
+            "933": 302618112.0,
+            "934": 302618112.0,
+            "935": 302618112.0,
+            "936": 302618112.0,
+            "937": 302618112.0,
+            "938": 302618112.0,
+            "939": 302618112.0,
+            "940": 302618112.0,
+            "941": 302618112.0,
+            "942": 302618112.0,
+            "943": 302618112.0,
+            "944": 302618112.0,
+            "945": 302618112.0,
+            "946": 302618112.0,
+            "947": 302618112.0,
+            "948": 302618112.0,
+            "949": 302618112.0,
+            "950": 302618112.0,
+            "951": 302618112.0,
+            "952": 302618112.0,
+            "953": 302618112.0,
+            "954": 302618112.0,
+            "955": 302618112.0,
+            "956": 302618112.0,
+            "957": 302618112.0,
+            "958": 302618112.0,
+            "959": 302618112.0,
+            "960": 302618112.0,
+            "961": 302618112.0,
+            "962": 302618112.0,
+            "963": 302618112.0,
+            "964": 302618112.0,
+            "965": 302618112.0,
+            "966": 302618112.0,
+            "967": 302618112.0,
+            "968": 302618112.0,
+            "969": 302618112.0,
+            "970": 302618112.0,
+            "971": 302618112.0,
+            "972": 302618112.0,
+            "973": 302618112.0,
+            "974": 302618112.0,
+            "975": 302618112.0,
+            "976": 302618112.0,
+            "977": 302618112.0,
+            "978": 302618112.0,
+            "979": 302618112.0,
+            "980": 302618112.0,
+            "981": 302618112.0,
+            "982": 302618112.0,
+            "983": 302618112.0,
+            "984": 302618112.0,
+            "985": 302618112.0,
+            "986": 302618112.0,
+            "987": 302618112.0,
+            "988": 302618112.0,
+            "989": 302618112.0,
+            "990": 302618112.0,
+            "991": 302618112.0,
+            "992": 302618112.0,
+            "993": 302618112.0,
+            "994": 302618112.0,
+            "995": 302618112.0,
+            "996": 302618112.0,
+            "997": 302618112.0,
+            "998": 302618112.0,
+            "999": 302618112.0,
+            "1000": 302618112.0,
+            "1001": 302618112.0,
+            "1002": 302618112.0,
+            "1003": 302618112.0,
+            "1004": 302618112.0,
+            "1005": 302618112.0,
+            "1006": 302618112.0,
+            "1007": 302618112.0,
+            "1008": 302618112.0,
+            "1009": 302618112.0,
+            "1010": 302618112.0,
+            "1011": 302618112.0,
+            "1012": 302618112.0,
+            "1013": 302618112.0,
+            "1014": 302618112.0,
+            "1015": 302618112.0,
+            "1016": 302618112.0,
+            "1017": 302618112.0,
+            "1018": 302618112.0,
+            "1019": 302618112.0,
+            "1020": 302618112.0,
+            "1021": 302618112.0,
+            "1022": 302618112.0,
+            "1023": 302618112.0,
+            "1024": 302618112.0,
+            "1025": 302618112.0,
+            "1026": 302618112.0,
+            "1027": 302618112.0,
+            "1028": 302618112.0,
+            "1029": 302618112.0,
+            "1030": 302618112.0,
+            "1031": 302618112.0,
+            "1032": 302618112.0,
+            "1033": 302618112.0,
+            "1034": 302618112.0,
+            "1035": 302618112.0,
+            "1036": 302618112.0,
+            "1037": 302618112.0,
+            "1038": 302618112.0,
+            "1039": 302618112.0,
+            "1040": 302618112.0,
+            "1041": 302618112.0,
+            "1042": 302618112.0,
+            "1043": 302618112.0,
+            "1044": 302618112.0,
+            "1045": 302618112.0,
+            "1046": 302618112.0,
+            "1047": 302618112.0,
+            "1048": 302618112.0,
+            "1049": 302618112.0,
+            "1050": 302618112.0,
+            "1051": 302618112.0,
+            "1052": 302618112.0,
+            "1053": 302618112.0,
+            "1054": 302618112.0,
+            "1055": 302618112.0,
+            "1056": 302618112.0,
+            "1057": 302618112.0,
+            "1058": 302618112.0,
+            "1059": 302618112.0,
+            "1060": 302618112.0,
+            "1061": 302618112.0,
+            "1062": 302618112.0,
+            "1063": 302618112.0,
+            "1064": 302618112.0,
+            "1065": 302618112.0,
+            "1066": 302618112.0,
+            "1067": 302618112.0,
+            "1068": 302618112.0,
+            "1069": 302618112.0,
+            "1070": 302618112.0,
+            "1071": 302618112.0,
+            "1072": 302618112.0,
+            "1073": 302618112.0,
+            "1074": 302618112.0,
+            "1075": 302618112.0,
+            "1076": 302618112.0,
+            "1077": 302618112.0,
+            "1078": 302618112.0,
+            "1079": 302618112.0,
+            "1080": 302618112.0,
+            "1081": 302618112.0,
+            "1082": 302618112.0,
+            "1083": 302618112.0,
+            "1084": 302618112.0,
+            "1085": 302618112.0,
+            "1086": 302618112.0,
+            "1087": 302618112.0,
+            "1088": 302618112.0,
+            "1089": 302618112.0,
+            "1090": 302618112.0,
+            "1091": 302618112.0,
+            "1092": 302618112.0,
+            "1093": 302618112.0,
+            "1094": 302618112.0,
+            "1095": 302618112.0,
+            "1096": 302618112.0,
+            "1097": 302618112.0,
+            "1098": 302618112.0,
+            "1099": 302618112.0,
+            "1100": 302618112.0,
+            "1101": 302618112.0,
+            "1102": 302618112.0,
+            "1103": 302618112.0,
+            "1104": 302618112.0,
+            "1105": 302618112.0,
+            "1106": 302618112.0,
+            "1107": 302618112.0,
+            "1108": 302618112.0,
+            "1109": 302618112.0,
+            "1110": 302618112.0,
+            "1111": 302618112.0,
+            "1112": 302618112.0,
+            "1113": 302618112.0,
+            "1114": 302618112.0,
+            "1115": 302618112.0,
+            "1116": 302618112.0,
+            "1117": 302618112.0,
+            "1118": 302618112.0,
+            "1119": 302618112.0,
+            "1120": 302618112.0,
+            "1121": 302618112.0,
+            "1122": 302618112.0,
+            "1123": 302618112.0,
+            "1124": 302618112.0,
+            "1125": 302618112.0,
+            "1126": 302618112.0,
+            "1127": 302618112.0,
+            "1128": 302618112.0,
+            "1129": 302618112.0,
+            "1130": 302618112.0,
+            "1131": 302618112.0,
+            "1132": 302618112.0,
+            "1133": 302618112.0,
+            "1134": 302618112.0,
+            "1135": 302618112.0,
+            "1136": 302618112.0,
+            "1137": 302618112.0,
+            "1138": 302618112.0,
+            "1139": 302618112.0,
+            "1140": 302618112.0,
+            "1141": 302618112.0,
+            "1142": 302618112.0,
+            "1143": 302618112.0,
+            "1144": 302618112.0,
+            "1145": 302618112.0,
+            "1146": 302618112.0,
+            "1147": 302618112.0,
+            "1148": 302618112.0,
+            "1149": 302618112.0,
+            "1150": 302618112.0,
+            "1151": 302618112.0,
+            "1152": 302618112.0,
+            "1153": 302618112.0,
+            "1154": 302618112.0,
+            "1155": 302618112.0,
+            "1156": 302618112.0,
+            "1157": 302618112.0,
+            "1158": 302618112.0,
+            "1159": 302618112.0,
+            "1160": 302618112.0,
+            "1161": 302618112.0,
+            "1162": 302618112.0,
+            "1163": 302618112.0,
+            "1164": 302618112.0,
+            "1165": 302618112.0,
+            "1166": 302618112.0,
+            "1167": 302618112.0,
+            "1168": 302618112.0,
+            "1169": 302618112.0,
+            "1170": 302618112.0,
+            "1171": 302618112.0,
+            "1172": 302618112.0,
+            "1173": 302618112.0,
+            "1174": 302618112.0,
+            "1175": 302618112.0,
+            "1176": 302618112.0,
+            "1177": 302618112.0,
+            "1178": 302618112.0,
+            "1179": 302618112.0,
+            "1180": 302618112.0,
+            "1181": 302618112.0,
+            "1182": 302618112.0,
+            "1183": 302618112.0,
+            "1184": 302618112.0,
+            "1185": 302618112.0,
+            "1186": 302618112.0,
+            "1187": 302618112.0,
+            "1188": 302618112.0,
+            "1189": 302618112.0,
+            "1190": 302618112.0,
+            "1191": 302618112.0,
+            "1192": 302618112.0,
+            "1193": 302618112.0,
+            "1194": 302618112.0,
+            "1195": 302618112.0,
+            "1196": 302618112.0,
+            "1197": 302618112.0,
+            "1198": 302618112.0,
+            "1199": 302618112.0,
+            "1200": 302618112.0,
+            "1201": 302618112.0,
+            "1202": 302618112.0,
+            "1203": 302618112.0,
+            "1204": 302618112.0,
+            "1205": 302618112.0,
+            "1206": 302618112.0,
+            "1207": 302618112.0,
+            "1208": 302618112.0,
+            "1209": 302618112.0,
+            "1210": 302618112.0,
+            "1211": 302618112.0,
+            "1212": 302618112.0,
+            "1213": 302618112.0,
+            "1214": 302618112.0,
+            "1215": 302618112.0,
+            "1216": 302618112.0,
+            "1217": 302618112.0,
+            "1218": 302618112.0,
+            "1219": 302618112.0,
+            "1220": 302618112.0,
+            "1221": 302618112.0,
+            "1222": 302618112.0,
+            "1223": 302618112.0,
+            "1224": 302618112.0,
+            "1225": 302618112.0,
+            "1226": 302618112.0,
+            "1227": 302618112.0,
+            "1228": 302618112.0,
+            "1229": 302618112.0,
+            "1230": 302618112.0,
+            "1231": 302618112.0,
+            "1232": 302618112.0,
+            "1233": 302618112.0,
+            "1234": 302618112.0,
+            "1235": 302618112.0,
+            "1236": 302618112.0,
+            "1237": 302618112.0,
+            "1238": 302618112.0,
+            "1239": 302618112.0,
+            "1240": 302618112.0,
+            "1241": 302618112.0,
+            "1242": 302618112.0,
+            "1243": 302618112.0,
+            "1244": 302618112.0,
+            "1245": 302618112.0,
+            "1246": 302618112.0,
+            "1247": 302618112.0,
+            "1248": 302618112.0,
+            "1249": 302618112.0,
+            "1250": 302618112.0,
+            "1251": 302618112.0,
+            "1252": 302618112.0,
+            "1253": 302618112.0,
+            "1254": 302618112.0,
+            "1255": 302618112.0,
+            "1256": 302618112.0,
+            "1257": 302618112.0,
+            "1258": 302618112.0,
+            "1259": 302618112.0,
+            "1260": 302618112.0,
+            "1261": 302618112.0,
+            "1262": 302618112.0,
+            "1263": 302618112.0,
+            "1264": 302618112.0,
+            "1265": 302618112.0,
+            "1266": 302618112.0,
+            "1267": 302618112.0,
+            "1268": 302618112.0,
+            "1269": 302618112.0,
+            "1270": 302618112.0,
+            "1271": 302618112.0,
+            "1272": 302618112.0,
+            "1273": 302618112.0,
+            "1274": 302618112.0,
+            "1275": 302618112.0,
+            "1276": 302618112.0,
+            "1277": 302618112.0,
+            "1278": 302618112.0,
+            "1279": 302618112.0,
+            "1280": 302618112.0,
+            "1281": 302618112.0,
+            "1282": 302618112.0,
+            "1283": 302618112.0,
+            "1284": 302618112.0,
+            "1285": 302618112.0,
+            "1286": 302618112.0,
+            "1287": 302618112.0,
+            "1288": 302618112.0,
+            "1289": 302618112.0,
+            "1290": 302618112.0,
+            "1291": 302618112.0,
+            "1292": 302618112.0,
+            "1293": 302618112.0,
+            "1294": 302618112.0,
+            "1295": 302618112.0,
+            "1296": 302618112.0,
+            "1297": 302618112.0,
+            "1298": 302618112.0,
+            "1299": 302618112.0,
+            "1300": 302618112.0,
+            "1301": 302618112.0,
+            "1302": 302618112.0,
+            "1303": 302618112.0,
+            "1304": 302618112.0,
+            "1305": 302618112.0,
+            "1306": 302618112.0,
+            "1307": 302618112.0,
+            "1308": 302618112.0,
+            "1309": 302618112.0,
+            "1310": 302618112.0,
+            "1311": 302618112.0,
+            "1312": 302618112.0,
+            "1313": 302618112.0,
+            "1314": 302618112.0,
+            "1315": 302618112.0,
+            "1316": 302618112.0,
+            "1317": 302618112.0,
+            "1318": 302618112.0,
+            "1319": 302618112.0,
+            "1320": 302618112.0,
+            "1321": 302618112.0,
+            "1322": 302618112.0,
+            "1323": 302618112.0,
+            "1324": 302618112.0,
+            "1325": 302618112.0,
+            "1326": 302618112.0,
+            "1327": 302618112.0,
+            "1328": 302618112.0,
+            "1329": 302618112.0,
+            "1330": 302618112.0,
+            "1331": 302618112.0,
+            "1332": 302618112.0,
+            "1333": 302618112.0,
+            "1334": 302618112.0,
+            "1335": 302618112.0,
+            "1336": 302618112.0,
+            "1337": 302618112.0,
+            "1338": 302618112.0,
+            "1339": 302618112.0,
+            "1340": 302618112.0,
+            "1341": 302618112.0,
+            "1342": 302618112.0,
+            "1343": 302618112.0,
+            "1344": 302618112.0,
+            "1345": 302618112.0,
+            "1346": 302618112.0,
+            "1347": 302618112.0,
+            "1348": 302618112.0,
+            "1349": 302618112.0,
+            "1350": 302618112.0,
+            "1351": 302618112.0,
+            "1352": 302618112.0,
+            "1353": 302618112.0,
+            "1354": 302618112.0,
+            "1355": 302618112.0,
+            "1356": 302618112.0,
+            "1357": 302618112.0,
+            "1358": 302618112.0,
+            "1359": 302618112.0,
+            "1360": 302618112.0,
+            "1361": 302618112.0,
+            "1362": 302618112.0,
+            "1363": 302618112.0,
+            "1364": 302618112.0,
+            "1365": 302618112.0,
+            "1366": 302618112.0,
+            "1367": 302618112.0,
+            "1368": 302618112.0,
+            "1369": 302618112.0,
+            "1370": 302618112.0,
+            "1371": 302618112.0,
+            "1372": 302618112.0,
+            "1373": 302618112.0,
+            "1374": 302618112.0,
+            "1375": 302618112.0,
+            "1376": 302618112.0,
+            "1377": 302618112.0,
+            "1378": 302618112.0,
+            "1379": 302618112.0,
+            "1380": 302618112.0,
+            "1381": 302618112.0,
+            "1382": 302618112.0,
+            "1383": 302618112.0,
+            "1384": 302618112.0,
+            "1385": 302618112.0,
+            "1386": 302618112.0,
+            "1387": 302618112.0,
+            "1388": 302618112.0,
+            "1389": 302618112.0,
+            "1390": 302618112.0,
+            "1391": 302618112.0,
+            "1392": 302618112.0,
+            "1393": 302618112.0,
+            "1394": 302618112.0,
+            "1395": 302618112.0,
+            "1396": 302618112.0,
+            "1397": 302618112.0,
+            "1398": 302618112.0,
+            "1399": 302618112.0,
+            "1400": 302618112.0,
+            "1401": 302618112.0,
+            "1402": 302618112.0,
+            "1403": 302618112.0,
+            "1404": 302618112.0,
+            "1405": 302618112.0,
+            "1406": 302618112.0,
+            "1407": 302618112.0,
+            "1408": 302618112.0,
+            "1409": 302618112.0,
+            "1410": 302618112.0,
+            "1411": 302618112.0,
+            "1412": 302618112.0,
+            "1413": 302618112.0,
+            "1414": 302618112.0,
+            "1415": 302618112.0,
+            "1416": 302618112.0,
+            "1417": 302618112.0,
+            "1418": 302618112.0,
+            "1419": 302618112.0,
+            "1420": 302618112.0,
+            "1421": 302618112.0,
+            "1422": 302618112.0,
+            "1423": 302618112.0,
+            "1424": 302618112.0,
+            "1425": 302618112.0,
+            "1426": 302618112.0,
+            "1427": 302618112.0,
+            "1428": 302618112.0,
+            "1429": 302618112.0,
+            "1430": 302618112.0,
+            "1431": 302618112.0,
+            "1432": 302618112.0,
+            "1433": 302618112.0,
+            "1434": 302618112.0,
+            "1435": 302618112.0,
+            "1436": 302618112.0,
+            "1437": 302618112.0,
+            "1438": 302618112.0,
+            "1439": 302618112.0,
+            "1440": 302618112.0,
+            "1441": 302618112.0,
+            "1442": 302618112.0,
+            "1443": 302618112.0,
+            "1444": 302618112.0,
+            "1445": 302618112.0,
+            "1446": 302618112.0,
+            "1447": 302618112.0,
+            "1448": 302618112.0,
+            "1449": 302618112.0,
+            "1450": 302618112.0,
+            "1451": 302618112.0,
+            "1452": 302618112.0,
+            "1453": 302618112.0,
+            "1454": 302618112.0,
+            "1455": 302618112.0,
+            "1456": 302618112.0,
+            "1457": 302618112.0,
+            "1458": 302618112.0,
+            "1459": 302618112.0,
+            "1460": 302618112.0,
+            "1461": 302618112.0,
+            "1462": 302618112.0,
+            "1463": 302618112.0,
+            "1464": 302618112.0,
+            "1465": 302618112.0,
+            "1466": 302618112.0,
+            "1467": 302618112.0,
+            "1468": 302618112.0,
+            "1469": 302618112.0,
+            "1470": 302618112.0,
+            "1471": 302618112.0,
+            "1472": 302618112.0,
+            "1473": 302618112.0,
+            "1474": 302618112.0,
+            "1475": 302618112.0,
+            "1476": 302618112.0,
+            "1477": 302618112.0,
+            "1478": 302618112.0,
+            "1479": 302618112.0,
+            "1480": 302618112.0,
+            "1481": 302618112.0,
+            "1482": 302618112.0,
+            "1483": 302618112.0,
+            "1484": 302618112.0,
+            "1485": 302618112.0,
+            "1486": 302618112.0,
+            "1487": 302618112.0,
+            "1488": 302618112.0,
+            "1489": 302618112.0,
+            "1490": 302618112.0,
+            "1491": 302618112.0,
+            "1492": 302618112.0,
+            "1493": 302618112.0,
+            "1494": 302618112.0,
+            "1495": 302618112.0,
+            "1496": 302618112.0,
+            "1497": 302618112.0,
+            "1498": 302618112.0,
+            "1499": 302618112.0,
+            "1500": 302618112.0,
+            "1501": 302618112.0,
+            "1502": 302618112.0,
+            "1503": 302618112.0,
+            "1504": 302618112.0,
+            "1505": 302618112.0,
+            "1506": 302618112.0,
+            "1507": 302618112.0,
+            "1508": 302618112.0,
+            "1509": 302618112.0,
+            "1510": 302618112.0,
+            "1511": 302618112.0,
+            "1512": 302618112.0,
+            "1513": 302618112.0,
+            "1514": 302618112.0,
+            "1515": 302618112.0,
+            "1516": 302618112.0,
+            "1517": 302618112.0,
+            "1518": 302618112.0,
+            "1519": 302618112.0,
+            "1520": 302618112.0,
+            "1521": 302618112.0,
+            "1522": 302618112.0,
+            "1523": 302618112.0,
+            "1524": 302618112.0,
+            "1525": 302618112.0,
+            "1526": 302618112.0,
+            "1527": 302618112.0,
+            "1528": 302618112.0,
+            "1529": 302618112.0,
+            "1530": 302618112.0,
+            "1531": 302618112.0,
+            "1532": 302618112.0,
+            "1533": 302618112.0,
+            "1534": 302618112.0,
+            "1535": 302618112.0,
+            "1536": 302618112.0,
+            "1537": 302618112.0,
+            "1538": 302618112.0,
+            "1539": 302618112.0,
+            "1540": 302618112.0,
+            "1541": 302618112.0,
+            "1542": 302618112.0,
+            "1543": 302618112.0,
+            "1544": 302618112.0,
+            "1545": 302618112.0,
+            "1546": 302618112.0,
+            "1547": 302618112.0,
+            "1548": 302618112.0,
+            "1549": 302618112.0,
+            "1550": 302618112.0,
+            "1551": 302618112.0,
+            "1552": 302618112.0,
+            "1553": 302618112.0,
+            "1554": 302618112.0,
+            "1555": 302618112.0,
+            "1556": 302618112.0,
+            "1557": 302618112.0,
+            "1558": 302618112.0,
+            "1559": 302618112.0,
+            "1560": 302618112.0,
+            "1561": 302618112.0,
+            "1562": 302618112.0,
+            "1563": 302618112.0,
+            "1564": 302618112.0,
+            "1565": 302618112.0,
+            "1566": 302618112.0,
+            "1567": 302618112.0,
+            "1568": 302618112.0,
+            "1569": 302618112.0,
+            "1570": 302618112.0,
+            "1571": 302618112.0,
+            "1572": 302618112.0,
+            "1573": 302618112.0,
+            "1574": 302618112.0,
+            "1575": 302618112.0,
+            "1576": 302618112.0,
+            "1577": 302618112.0,
+            "1578": 302618112.0,
+            "1579": 302618112.0,
+            "1580": 302618112.0,
+            "1581": 302618112.0,
+            "1582": 302618112.0,
+            "1583": 302618112.0,
+            "1584": 302618112.0,
+            "1585": 302618112.0,
+            "1586": 302618112.0,
+            "1587": 302618112.0,
+            "1588": 302618112.0,
+            "1589": 302618112.0,
+            "1590": 302618112.0,
+            "1591": 302618112.0,
+            "1592": 302618112.0,
+            "1593": 302618112.0,
+            "1594": 302618112.0,
+            "1595": 302618112.0,
+            "1596": 302618112.0,
+            "1597": 302618112.0,
+            "1598": 302618112.0,
+            "1599": 302618112.0,
+            "1600": 302618112.0,
+            "1601": 302618112.0,
+            "1602": 302618112.0,
+            "1603": 302618112.0,
+            "1604": 302618112.0,
+            "1605": 302618112.0,
+            "1606": 302618112.0,
+            "1607": 302618112.0,
+            "1608": 302618112.0,
+            "1609": 302618112.0,
+            "1610": 302618112.0,
+            "1611": 302618112.0,
+            "1612": 302618112.0,
+            "1613": 302618112.0,
+            "1614": 302618112.0,
+            "1615": 302618112.0,
+            "1616": 302618112.0,
+            "1617": 302618112.0,
+            "1618": 302618112.0,
+            "1619": 302618112.0,
+            "1620": 302618112.0,
+            "1621": 302618112.0,
+            "1622": 302618112.0,
+            "1623": 302618112.0,
+            "1624": 302618112.0,
+            "1625": 302618112.0,
+            "1626": 302618112.0,
+            "1627": 302618112.0,
+            "1628": 302618112.0,
+            "1629": 302618112.0,
+            "1630": 302618112.0,
+            "1631": 302618112.0,
+            "1632": 302618112.0,
+            "1633": 302618112.0,
+            "1634": 302618112.0,
+            "1635": 302618112.0,
+            "1636": 302618112.0,
+            "1637": 302618112.0,
+            "1638": 302618112.0,
+            "1639": 302618112.0,
+            "1640": 302618112.0,
+            "1641": 302618112.0,
+            "1642": 302618112.0,
+            "1643": 302618112.0,
+            "1644": 302618112.0,
+            "1645": 302618112.0,
+            "1646": 302618112.0,
+            "1647": 302618112.0,
+            "1648": 302618112.0,
+            "1649": 302618112.0,
+            "1650": 302618112.0,
+            "1651": 302618112.0,
+            "1652": 302618112.0,
+            "1653": 302618112.0,
+            "1654": 302618112.0,
+            "1655": 302618112.0,
+            "1656": 302618112.0,
+            "1657": 302618112.0,
+            "1658": 302618112.0,
+            "1659": 302618112.0,
+            "1660": 302618112.0,
+            "1661": 302618112.0,
+            "1662": 302618112.0,
+            "1663": 302618112.0,
+            "1664": 302618112.0,
+            "1665": 302618112.0,
+            "1666": 302618112.0,
+            "1667": 302618112.0,
+            "1668": 302618112.0,
+            "1669": 302618112.0,
+            "1670": 302618112.0,
+            "1671": 302618112.0,
+            "1672": 302618112.0,
+            "1673": 302618112.0,
+            "1674": 302618112.0,
+            "1675": 302618112.0,
+            "1676": 302618112.0,
+            "1677": 302618112.0,
+            "1678": 302618112.0,
+            "1679": 302618112.0,
+            "1680": 302618112.0,
+            "1681": 302618112.0,
+            "1682": 302618112.0,
+            "1683": 302618112.0,
+            "1684": 302618112.0,
+            "1685": 302618112.0,
+            "1686": 302618112.0,
+            "1687": 302618112.0,
+            "1688": 302618112.0,
+            "1689": 302618112.0,
+            "1690": 302618112.0,
+            "1691": 302618112.0,
+            "1692": 302618112.0,
+            "1693": 302618112.0,
+            "1694": 302618112.0,
+            "1695": 302618112.0,
+            "1696": 302618112.0,
+            "1697": 302618112.0,
+            "1698": 302618112.0,
+            "1699": 302618112.0,
+            "1700": 302618112.0,
+            "1701": 302618112.0,
+            "1702": 302618112.0,
+            "1703": 302618112.0,
+            "1704": 302618112.0,
+            "1705": 302618112.0,
+            "1706": 302618112.0,
+            "1707": 302618112.0,
+            "1708": 302618112.0,
+            "1709": 302618112.0,
+            "1710": 302618112.0,
+            "1711": 302618112.0,
+            "1712": 302618112.0,
+            "1713": 302618112.0,
+            "1714": 302618112.0,
+            "1715": 302618112.0,
+            "1716": 302618112.0,
+            "1717": 302618112.0,
+            "1718": 302618112.0,
+            "1719": 302618112.0,
+            "1720": 302618112.0,
+            "1721": 302618112.0,
+            "1722": 302618112.0,
+            "1723": 302618112.0,
+            "1724": 302618112.0,
+            "1725": 302618112.0,
+            "1726": 302618112.0,
+            "1727": 302618112.0,
+            "1728": 302618112.0,
+            "1729": 302618112.0,
+            "1730": 302618112.0,
+            "1731": 302618112.0,
+            "1732": 302618112.0,
+            "1733": 302618112.0,
+            "1734": 302618112.0,
+            "1735": 302618112.0,
+            "1736": 302618112.0,
+            "1737": 302618112.0,
+            "1738": 302618112.0,
+            "1739": 302618112.0,
+            "1740": 302618112.0,
+            "1741": 302618112.0,
+            "1742": 302618112.0,
+            "1743": 302618112.0,
+            "1744": 302618112.0,
+            "1745": 302618112.0,
+            "1746": 302618112.0,
+            "1747": 302618112.0,
+            "1748": 302618112.0,
+            "1749": 302618112.0,
+            "1750": 302618112.0,
+            "1751": 302618112.0,
+            "1752": 302618112.0,
+            "1753": 302618112.0,
+            "1754": 302618112.0,
+            "1755": 302618112.0,
+            "1756": 302618112.0,
+            "1757": 302618112.0,
+            "1758": 302618112.0,
+            "1759": 302618112.0,
+            "1760": 302618112.0,
+            "1761": 302618112.0,
+            "1762": 302618112.0,
+            "1763": 302618112.0,
+            "1764": 302618112.0,
+            "1765": 302618112.0,
+            "1766": 302618112.0,
+            "1767": 302618112.0,
+            "1768": 302618112.0,
+            "1769": 302618112.0,
+            "1770": 302618112.0,
+            "1771": 302618112.0,
+            "1772": 302618112.0,
+            "1773": 302618112.0,
+            "1774": 302618112.0,
+            "1775": 302618112.0,
+            "1776": 302618112.0,
+            "1777": 302618112.0,
+            "1778": 302618112.0,
+            "1779": 302618112.0,
+            "1780": 302618112.0,
+            "1781": 302618112.0,
+            "1782": 302618112.0,
+            "1783": 302618112.0,
+            "1784": 302618112.0,
+            "1785": 302618112.0,
+            "1786": 302618112.0,
+            "1787": 302618112.0,
+            "1788": 302618112.0,
+            "1789": 302618112.0,
+            "1790": 302618112.0,
+            "1791": 302618112.0,
+            "1792": 302618112.0,
+            "1793": 302618112.0,
+            "1794": 302618112.0,
+            "1795": 302618112.0,
+            "1796": 302618112.0,
+            "1797": 302618112.0,
+            "1798": 302618112.0,
+            "1799": 302618112.0,
+            "1800": 302618112.0,
+            "1801": 302618112.0,
+            "1802": 302618112.0,
+            "1803": 302618112.0,
+            "1804": 302618112.0,
+            "1805": 302618112.0,
+            "1806": 302618112.0,
+            "1807": 302618112.0,
+            "1808": 302618112.0,
+            "1809": 302618112.0,
+            "1810": 302618112.0,
+            "1811": 302618112.0,
+            "1812": 302618112.0,
+            "1813": 302618112.0,
+            "1814": 302618112.0,
+            "1815": 302618112.0,
+            "1816": 302618112.0,
+            "1817": 302618112.0,
+            "1818": 302618112.0,
+            "1819": 302618112.0,
+            "1820": 302618112.0,
+            "1821": 302618112.0,
+            "1822": 302618112.0,
+            "1823": 302618112.0,
+            "1824": 302618112.0,
+            "1825": 302618112.0,
+            "1826": 302618112.0,
+            "1827": 302618112.0,
+            "1828": 302618112.0,
+            "1829": 302618112.0,
+            "1830": 302618112.0,
+            "1831": 302618112.0,
+            "1832": 302618112.0,
+            "1833": 302618112.0,
+            "1834": 302618112.0,
+            "1835": 302618112.0,
+            "1836": 302618112.0,
+            "1837": 302618112.0,
+            "1838": 302618112.0,
+            "1839": 302618112.0,
+            "1840": 302618112.0,
+            "1841": 302618112.0,
+            "1842": 302618112.0,
+            "1843": 302618112.0,
+            "1844": 302618112.0,
+            "1845": 302618112.0,
+            "1846": 302618112.0,
+            "1847": 302618112.0,
+            "1848": 302618112.0,
+            "1849": 302618112.0,
+            "1850": 302618112.0,
+            "1851": 302618112.0,
+            "1852": 302618112.0,
+            "1853": 302618112.0,
+            "1854": 302618112.0,
+            "1855": 302618112.0,
+            "1856": 302618112.0,
+            "1857": 302618112.0,
+            "1858": 302618112.0,
+            "1859": 302618112.0,
+            "1860": 302618112.0,
+            "1861": 302618112.0,
+            "1862": 302618112.0,
+            "1863": 302618112.0,
+            "1864": 302618112.0,
+            "1865": 302618112.0,
+            "1866": 302618112.0,
+            "1867": 302618112.0,
+            "1868": 302618112.0,
+            "1869": 302618112.0,
+            "1870": 302618112.0,
+            "1871": 302618112.0,
+            "1872": 302618112.0,
+            "1873": 302618112.0,
+            "1874": 302618112.0,
+            "1875": 302618112.0,
+            "1876": 302618112.0,
+            "1877": 302618112.0,
+            "1878": 302618112.0,
+            "1879": 302618112.0,
+            "1880": 302618112.0,
+            "1881": 302618112.0,
+            "1882": 302618112.0,
+            "1883": 302618112.0,
+            "1884": 302618112.0,
+            "1885": 302618112.0,
+            "1886": 302618112.0,
+            "1887": 302618112.0,
+            "1888": 302618112.0,
+            "1889": 302618112.0,
+            "1890": 302618112.0,
+            "1891": 302618112.0,
+            "1892": 302618112.0,
+            "1893": 302618112.0,
+            "1894": 302618112.0,
+            "1895": 302618112.0,
+            "1896": 302618112.0,
+            "1897": 302618112.0,
+            "1898": 302618112.0,
+            "1899": 302618112.0,
+            "1900": 302618112.0,
+            "1901": 302618112.0,
+            "1902": 302618112.0,
+            "1903": 302618112.0,
+            "1904": 302618112.0,
+            "1905": 302618112.0,
+            "1906": 302618112.0,
+            "1907": 302618112.0,
+            "1908": 302618112.0,
+            "1909": 302618112.0,
+            "1910": 302618112.0,
+            "1911": 302618112.0,
+            "1912": 302618112.0,
+            "1913": 302618112.0,
+            "1914": 302618112.0,
+            "1915": 302618112.0,
+            "1916": 302618112.0,
+            "1917": 302618112.0,
+            "1918": 302618112.0,
+            "1919": 302618112.0,
+            "1920": 302618112.0,
+            "1921": 302618112.0,
+            "1922": 302618112.0,
+            "1923": 302618112.0,
+            "1924": 302618112.0,
+            "1925": 302618112.0,
+            "1926": 302618112.0,
+            "1927": 302618112.0,
+            "1928": 302618112.0,
+            "1929": 302618112.0,
+            "1930": 302618112.0,
+            "1931": 302618112.0,
+            "1932": 302618112.0,
+            "1933": 302618112.0,
+            "1934": 302618112.0,
+            "1935": 302618112.0,
+            "1936": 302618112.0,
+            "1937": 302618112.0,
+            "1938": 302618112.0,
+            "1939": 302618112.0,
+            "1940": 302618112.0,
+            "1941": 302618112.0,
+            "1942": 302618112.0,
+            "1943": 302618112.0,
+            "1944": 302618112.0,
+            "1945": 302618112.0,
+            "1946": 302618112.0,
+            "1947": 302618112.0,
+            "1948": 302618112.0,
+            "1949": 302618112.0,
+            "1950": 302618112.0,
+            "1951": 302618112.0,
+            "1952": 302618112.0,
+            "1953": 302618112.0,
+            "1954": 302618112.0,
+            "1955": 302618112.0,
+            "1956": 302618112.0,
+            "1957": 302618112.0,
+            "1958": 302618112.0,
+            "1959": 302618112.0,
+            "1960": 302618112.0,
+            "1961": 302618112.0,
+            "1962": 302618112.0,
+            "1963": 302618112.0,
+            "1964": 302618112.0,
+            "1965": 302618112.0,
+            "1966": 302618112.0,
+            "1967": 302618112.0,
+            "1968": 302618112.0,
+            "1969": 302618112.0,
+            "1970": 302618112.0,
+            "1971": 302618112.0,
+            "1972": 302618112.0,
+            "1973": 302618112.0,
+            "1974": 302618112.0,
+            "1975": 302618112.0,
+            "1976": 302618112.0,
+            "1977": 302618112.0,
+            "1978": 302618112.0,
+            "1979": 302618112.0,
+            "1980": 302618112.0,
+            "1981": 302618112.0,
+            "1982": 302618112.0,
+            "1983": 302618112.0,
+            "1984": 302618112.0,
+            "1985": 302618112.0,
+            "1986": 302618112.0,
+            "1987": 302618112.0,
+            "1988": 302618112.0,
+            "1989": 302618112.0,
+            "1990": 302618112.0,
+            "1991": 302618112.0,
+            "1992": 302618112.0,
+            "1993": 302618112.0,
+            "1994": 302618112.0,
+            "1995": 302618112.0,
+            "1996": 302618112.0,
+            "1997": 302618112.0,
+            "1998": 302618112.0,
+            "1999": 302618112.0,
+            "2000": 302618112.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 362060288.0,
+            "2": 428612096.0,
+            "3": 428612096.0,
+            "4": 428612096.0,
+            "5": 428612096.0,
+            "6": 428612096.0,
+            "7": 428612096.0,
+            "8": 428612096.0,
+            "9": 428612096.0,
+            "10": 428612096.0,
+            "11": 428612096.0,
+            "12": 428612096.0,
+            "13": 428612096.0,
+            "14": 428612096.0,
+            "15": 428612096.0,
+            "16": 428612096.0,
+            "17": 428612096.0,
+            "18": 428612096.0,
+            "19": 428612096.0,
+            "20": 428612096.0,
+            "21": 428612096.0,
+            "22": 428612096.0,
+            "23": 428612096.0,
+            "24": 428612096.0,
+            "25": 428612096.0,
+            "26": 428612096.0,
+            "27": 428612096.0,
+            "28": 428612096.0,
+            "29": 428612096.0,
+            "30": 428612096.0,
+            "31": 428612096.0,
+            "32": 428612096.0,
+            "33": 428612096.0,
+            "34": 428612096.0,
+            "35": 428612096.0,
+            "36": 428612096.0,
+            "37": 428612096.0,
+            "38": 428612096.0,
+            "39": 428612096.0,
+            "40": 428612096.0,
+            "41": 428612096.0,
+            "42": 428612096.0,
+            "43": 428612096.0,
+            "44": 428612096.0,
+            "45": 428612096.0,
+            "46": 428612096.0,
+            "47": 428612096.0,
+            "48": 428612096.0,
+            "49": 428612096.0,
+            "50": 428612096.0,
+            "51": 428612096.0,
+            "52": 428612096.0,
+            "53": 428612096.0,
+            "54": 428612096.0,
+            "55": 428612096.0,
+            "56": 428612096.0,
+            "57": 428612096.0,
+            "58": 428612096.0,
+            "59": 428612096.0,
+            "60": 428612096.0,
+            "61": 428612096.0,
+            "62": 428612096.0,
+            "63": 428612096.0,
+            "64": 428612096.0,
+            "65": 428612096.0,
+            "66": 428612096.0,
+            "67": 428612096.0,
+            "68": 428612096.0,
+            "69": 428612096.0,
+            "70": 428612096.0,
+            "71": 428612096.0,
+            "72": 428612096.0,
+            "73": 428612096.0,
+            "74": 428612096.0,
+            "75": 428612096.0,
+            "76": 428612096.0,
+            "77": 428612096.0,
+            "78": 428612096.0,
+            "79": 428612096.0,
+            "80": 428612096.0,
+            "81": 428612096.0,
+            "82": 428612096.0,
+            "83": 428612096.0,
+            "84": 428612096.0,
+            "85": 428612096.0,
+            "86": 428612096.0,
+            "87": 428612096.0,
+            "88": 428612096.0,
+            "89": 428612096.0,
+            "90": 428612096.0,
+            "91": 428612096.0,
+            "92": 428612096.0,
+            "93": 428612096.0,
+            "94": 428612096.0,
+            "95": 428612096.0,
+            "96": 428612096.0,
+            "97": 428612096.0,
+            "98": 428612096.0,
+            "99": 428612096.0,
+            "100": 428612096.0,
+            "101": 428612096.0,
+            "102": 428612096.0,
+            "103": 428612096.0,
+            "104": 428612096.0,
+            "105": 428612096.0,
+            "106": 428612096.0,
+            "107": 428612096.0,
+            "108": 428612096.0,
+            "109": 428612096.0,
+            "110": 428612096.0,
+            "111": 428612096.0,
+            "112": 428612096.0,
+            "113": 428612096.0,
+            "114": 428612096.0,
+            "115": 428612096.0,
+            "116": 428612096.0,
+            "117": 428612096.0,
+            "118": 428612096.0,
+            "119": 428612096.0,
+            "120": 428612096.0,
+            "121": 428612096.0,
+            "122": 428612096.0,
+            "123": 428612096.0,
+            "124": 428612096.0,
+            "125": 428612096.0,
+            "126": 428612096.0,
+            "127": 428612096.0,
+            "128": 428612096.0,
+            "129": 428612096.0,
+            "130": 428612096.0,
+            "131": 428612096.0,
+            "132": 428612096.0,
+            "133": 428612096.0,
+            "134": 428612096.0,
+            "135": 428612096.0,
+            "136": 428612096.0,
+            "137": 428612096.0,
+            "138": 428612096.0,
+            "139": 428612096.0,
+            "140": 428612096.0,
+            "141": 428612096.0,
+            "142": 428612096.0,
+            "143": 428612096.0,
+            "144": 428612096.0,
+            "145": 428612096.0,
+            "146": 428612096.0,
+            "147": 428612096.0,
+            "148": 428612096.0,
+            "149": 428612096.0,
+            "150": 428612096.0,
+            "151": 428612096.0,
+            "152": 428612096.0,
+            "153": 428612096.0,
+            "154": 428612096.0,
+            "155": 428612096.0,
+            "156": 428612096.0,
+            "157": 428612096.0,
+            "158": 428612096.0,
+            "159": 428612096.0,
+            "160": 428612096.0,
+            "161": 428612096.0,
+            "162": 428612096.0,
+            "163": 428612096.0,
+            "164": 428612096.0,
+            "165": 428612096.0,
+            "166": 428612096.0,
+            "167": 428612096.0,
+            "168": 428612096.0,
+            "169": 428612096.0,
+            "170": 428612096.0,
+            "171": 428612096.0,
+            "172": 428612096.0,
+            "173": 428612096.0,
+            "174": 428612096.0,
+            "175": 428612096.0,
+            "176": 428612096.0,
+            "177": 428612096.0,
+            "178": 428612096.0,
+            "179": 428612096.0,
+            "180": 428612096.0,
+            "181": 428612096.0,
+            "182": 428612096.0,
+            "183": 428612096.0,
+            "184": 428612096.0,
+            "185": 428612096.0,
+            "186": 428612096.0,
+            "187": 428612096.0,
+            "188": 428612096.0,
+            "189": 428612096.0,
+            "190": 428612096.0,
+            "191": 428612096.0,
+            "192": 428612096.0,
+            "193": 428612096.0,
+            "194": 428612096.0,
+            "195": 428612096.0,
+            "196": 428612096.0,
+            "197": 428612096.0,
+            "198": 428612096.0,
+            "199": 428612096.0,
+            "200": 428612096.0,
+            "201": 428612096.0,
+            "202": 428612096.0,
+            "203": 428612096.0,
+            "204": 428612096.0,
+            "205": 428612096.0,
+            "206": 428612096.0,
+            "207": 428612096.0,
+            "208": 428612096.0,
+            "209": 428612096.0,
+            "210": 428612096.0,
+            "211": 428612096.0,
+            "212": 428612096.0,
+            "213": 428612096.0,
+            "214": 428612096.0,
+            "215": 428612096.0,
+            "216": 428612096.0,
+            "217": 428612096.0,
+            "218": 428612096.0,
+            "219": 428612096.0,
+            "220": 428612096.0,
+            "221": 428612096.0,
+            "222": 428612096.0,
+            "223": 428612096.0,
+            "224": 428612096.0,
+            "225": 428612096.0,
+            "226": 428612096.0,
+            "227": 428612096.0,
+            "228": 428612096.0,
+            "229": 428612096.0,
+            "230": 428612096.0,
+            "231": 428612096.0,
+            "232": 428612096.0,
+            "233": 428612096.0,
+            "234": 428612096.0,
+            "235": 428612096.0,
+            "236": 428612096.0,
+            "237": 428612096.0,
+            "238": 428612096.0,
+            "239": 428612096.0,
+            "240": 428612096.0,
+            "241": 428612096.0,
+            "242": 428612096.0,
+            "243": 428612096.0,
+            "244": 428612096.0,
+            "245": 428612096.0,
+            "246": 428612096.0,
+            "247": 428612096.0,
+            "248": 428612096.0,
+            "249": 428612096.0,
+            "250": 428612096.0,
+            "251": 428612096.0,
+            "252": 428612096.0,
+            "253": 428612096.0,
+            "254": 428612096.0,
+            "255": 428612096.0,
+            "256": 428612096.0,
+            "257": 428612096.0,
+            "258": 428612096.0,
+            "259": 428612096.0,
+            "260": 428612096.0,
+            "261": 428612096.0,
+            "262": 428612096.0,
+            "263": 428612096.0,
+            "264": 428612096.0,
+            "265": 428612096.0,
+            "266": 428612096.0,
+            "267": 428612096.0,
+            "268": 428612096.0,
+            "269": 428612096.0,
+            "270": 428612096.0,
+            "271": 428612096.0,
+            "272": 428612096.0,
+            "273": 428612096.0,
+            "274": 428612096.0,
+            "275": 428612096.0,
+            "276": 428612096.0,
+            "277": 428612096.0,
+            "278": 428612096.0,
+            "279": 428612096.0,
+            "280": 428612096.0,
+            "281": 428612096.0,
+            "282": 428612096.0,
+            "283": 428612096.0,
+            "284": 428612096.0,
+            "285": 428612096.0,
+            "286": 428612096.0,
+            "287": 428612096.0,
+            "288": 428612096.0,
+            "289": 428612096.0,
+            "290": 428612096.0,
+            "291": 428612096.0,
+            "292": 428612096.0,
+            "293": 428612096.0,
+            "294": 428612096.0,
+            "295": 428612096.0,
+            "296": 428612096.0,
+            "297": 428612096.0,
+            "298": 428612096.0,
+            "299": 428612096.0,
+            "300": 428612096.0,
+            "301": 428612096.0,
+            "302": 428612096.0,
+            "303": 428612096.0,
+            "304": 428612096.0,
+            "305": 428612096.0,
+            "306": 428612096.0,
+            "307": 428612096.0,
+            "308": 428612096.0,
+            "309": 428612096.0,
+            "310": 428612096.0,
+            "311": 428612096.0,
+            "312": 428612096.0,
+            "313": 428612096.0,
+            "314": 428612096.0,
+            "315": 428612096.0,
+            "316": 428612096.0,
+            "317": 428612096.0,
+            "318": 428612096.0,
+            "319": 428612096.0,
+            "320": 428612096.0,
+            "321": 428612096.0,
+            "322": 428612096.0,
+            "323": 428612096.0,
+            "324": 428612096.0,
+            "325": 428612096.0,
+            "326": 428612096.0,
+            "327": 428612096.0,
+            "328": 428612096.0,
+            "329": 428612096.0,
+            "330": 428612096.0,
+            "331": 428612096.0,
+            "332": 428612096.0,
+            "333": 428612096.0,
+            "334": 428612096.0,
+            "335": 428612096.0,
+            "336": 428612096.0,
+            "337": 428612096.0,
+            "338": 428612096.0,
+            "339": 428612096.0,
+            "340": 428612096.0,
+            "341": 428612096.0,
+            "342": 428612096.0,
+            "343": 428612096.0,
+            "344": 428612096.0,
+            "345": 428612096.0,
+            "346": 428612096.0,
+            "347": 428612096.0,
+            "348": 428612096.0,
+            "349": 428612096.0,
+            "350": 428612096.0,
+            "351": 428612096.0,
+            "352": 428612096.0,
+            "353": 428612096.0,
+            "354": 428612096.0,
+            "355": 428612096.0,
+            "356": 428612096.0,
+            "357": 428612096.0,
+            "358": 428612096.0,
+            "359": 428612096.0,
+            "360": 428612096.0,
+            "361": 428612096.0,
+            "362": 428612096.0,
+            "363": 428612096.0,
+            "364": 428612096.0,
+            "365": 428612096.0,
+            "366": 428612096.0,
+            "367": 428612096.0,
+            "368": 428612096.0,
+            "369": 428612096.0,
+            "370": 428612096.0,
+            "371": 428612096.0,
+            "372": 428612096.0,
+            "373": 428612096.0,
+            "374": 428612096.0,
+            "375": 428612096.0,
+            "376": 428612096.0,
+            "377": 428612096.0,
+            "378": 428612096.0,
+            "379": 428612096.0,
+            "380": 428612096.0,
+            "381": 428612096.0,
+            "382": 428612096.0,
+            "383": 428612096.0,
+            "384": 428612096.0,
+            "385": 428612096.0,
+            "386": 428612096.0,
+            "387": 428612096.0,
+            "388": 428612096.0,
+            "389": 428612096.0,
+            "390": 428612096.0,
+            "391": 428612096.0,
+            "392": 428612096.0,
+            "393": 428612096.0,
+            "394": 428612096.0,
+            "395": 428612096.0,
+            "396": 428612096.0,
+            "397": 428612096.0,
+            "398": 428612096.0,
+            "399": 428612096.0,
+            "400": 428612096.0,
+            "401": 428612096.0,
+            "402": 428612096.0,
+            "403": 428612096.0,
+            "404": 428612096.0,
+            "405": 428612096.0,
+            "406": 428612096.0,
+            "407": 428612096.0,
+            "408": 428612096.0,
+            "409": 428612096.0,
+            "410": 428612096.0,
+            "411": 428612096.0,
+            "412": 428612096.0,
+            "413": 428612096.0,
+            "414": 428612096.0,
+            "415": 428612096.0,
+            "416": 428612096.0,
+            "417": 428612096.0,
+            "418": 428612096.0,
+            "419": 428612096.0,
+            "420": 428612096.0,
+            "421": 428612096.0,
+            "422": 428612096.0,
+            "423": 428612096.0,
+            "424": 428612096.0,
+            "425": 428612096.0,
+            "426": 428612096.0,
+            "427": 428612096.0,
+            "428": 428612096.0,
+            "429": 428612096.0,
+            "430": 428612096.0,
+            "431": 428612096.0,
+            "432": 428612096.0,
+            "433": 428612096.0,
+            "434": 428612096.0,
+            "435": 428612096.0,
+            "436": 428612096.0,
+            "437": 428612096.0,
+            "438": 428612096.0,
+            "439": 428612096.0,
+            "440": 428612096.0,
+            "441": 428612096.0,
+            "442": 428612096.0,
+            "443": 428612096.0,
+            "444": 428612096.0,
+            "445": 428612096.0,
+            "446": 428612096.0,
+            "447": 428612096.0,
+            "448": 428612096.0,
+            "449": 428612096.0,
+            "450": 428612096.0,
+            "451": 428612096.0,
+            "452": 428612096.0,
+            "453": 428612096.0,
+            "454": 428612096.0,
+            "455": 428612096.0,
+            "456": 428612096.0,
+            "457": 428612096.0,
+            "458": 428612096.0,
+            "459": 428612096.0,
+            "460": 428612096.0,
+            "461": 428612096.0,
+            "462": 428612096.0,
+            "463": 428612096.0,
+            "464": 428612096.0,
+            "465": 428612096.0,
+            "466": 428612096.0,
+            "467": 428612096.0,
+            "468": 428612096.0,
+            "469": 428612096.0,
+            "470": 428612096.0,
+            "471": 428612096.0,
+            "472": 428612096.0,
+            "473": 428612096.0,
+            "474": 428612096.0,
+            "475": 428612096.0,
+            "476": 428612096.0,
+            "477": 428612096.0,
+            "478": 428612096.0,
+            "479": 428612096.0,
+            "480": 428612096.0,
+            "481": 428612096.0,
+            "482": 428612096.0,
+            "483": 428612096.0,
+            "484": 428612096.0,
+            "485": 428612096.0,
+            "486": 428612096.0,
+            "487": 428612096.0,
+            "488": 428612096.0,
+            "489": 428612096.0,
+            "490": 428612096.0,
+            "491": 428612096.0,
+            "492": 428612096.0,
+            "493": 428612096.0,
+            "494": 428612096.0,
+            "495": 428612096.0,
+            "496": 428612096.0,
+            "497": 428612096.0,
+            "498": 428612096.0,
+            "499": 428612096.0,
+            "500": 428612096.0,
+            "501": 428612096.0,
+            "502": 428612096.0,
+            "503": 428612096.0,
+            "504": 428612096.0,
+            "505": 428612096.0,
+            "506": 428612096.0,
+            "507": 428612096.0,
+            "508": 428612096.0,
+            "509": 428612096.0,
+            "510": 428612096.0,
+            "511": 428612096.0,
+            "512": 428612096.0,
+            "513": 428612096.0,
+            "514": 428612096.0,
+            "515": 428612096.0,
+            "516": 428612096.0,
+            "517": 428612096.0,
+            "518": 428612096.0,
+            "519": 428612096.0,
+            "520": 428612096.0,
+            "521": 428612096.0,
+            "522": 428612096.0,
+            "523": 428612096.0,
+            "524": 428612096.0,
+            "525": 428612096.0,
+            "526": 428612096.0,
+            "527": 428612096.0,
+            "528": 428612096.0,
+            "529": 428612096.0,
+            "530": 428612096.0,
+            "531": 428612096.0,
+            "532": 428612096.0,
+            "533": 428612096.0,
+            "534": 428612096.0,
+            "535": 428612096.0,
+            "536": 428612096.0,
+            "537": 428612096.0,
+            "538": 428612096.0,
+            "539": 428612096.0,
+            "540": 428612096.0,
+            "541": 428612096.0,
+            "542": 428612096.0,
+            "543": 428612096.0,
+            "544": 428612096.0,
+            "545": 428612096.0,
+            "546": 428612096.0,
+            "547": 428612096.0,
+            "548": 428612096.0,
+            "549": 428612096.0,
+            "550": 428612096.0,
+            "551": 428612096.0,
+            "552": 428612096.0,
+            "553": 428612096.0,
+            "554": 428612096.0,
+            "555": 428612096.0,
+            "556": 428612096.0,
+            "557": 428612096.0,
+            "558": 428612096.0,
+            "559": 428612096.0,
+            "560": 428612096.0,
+            "561": 428612096.0,
+            "562": 428612096.0,
+            "563": 428612096.0,
+            "564": 428612096.0,
+            "565": 428612096.0,
+            "566": 428612096.0,
+            "567": 428612096.0,
+            "568": 428612096.0,
+            "569": 428612096.0,
+            "570": 428612096.0,
+            "571": 428612096.0,
+            "572": 428612096.0,
+            "573": 428612096.0,
+            "574": 428612096.0,
+            "575": 428612096.0,
+            "576": 428612096.0,
+            "577": 428612096.0,
+            "578": 428612096.0,
+            "579": 428612096.0,
+            "580": 428612096.0,
+            "581": 428612096.0,
+            "582": 428612096.0,
+            "583": 428612096.0,
+            "584": 428612096.0,
+            "585": 428612096.0,
+            "586": 428612096.0,
+            "587": 428612096.0,
+            "588": 428612096.0,
+            "589": 428612096.0,
+            "590": 428612096.0,
+            "591": 428612096.0,
+            "592": 428612096.0,
+            "593": 428612096.0,
+            "594": 428612096.0,
+            "595": 428612096.0,
+            "596": 428612096.0,
+            "597": 428612096.0,
+            "598": 428612096.0,
+            "599": 428612096.0,
+            "600": 428612096.0,
+            "601": 428612096.0,
+            "602": 428612096.0,
+            "603": 428612096.0,
+            "604": 428612096.0,
+            "605": 428612096.0,
+            "606": 428612096.0,
+            "607": 428612096.0,
+            "608": 428612096.0,
+            "609": 428612096.0,
+            "610": 428612096.0,
+            "611": 428612096.0,
+            "612": 428612096.0,
+            "613": 428612096.0,
+            "614": 428612096.0,
+            "615": 428612096.0,
+            "616": 428612096.0,
+            "617": 428612096.0,
+            "618": 428612096.0,
+            "619": 428612096.0,
+            "620": 428612096.0,
+            "621": 428612096.0,
+            "622": 428612096.0,
+            "623": 428612096.0,
+            "624": 428612096.0,
+            "625": 428612096.0,
+            "626": 428612096.0,
+            "627": 428612096.0,
+            "628": 428612096.0,
+            "629": 428612096.0,
+            "630": 428612096.0,
+            "631": 428612096.0,
+            "632": 428612096.0,
+            "633": 428612096.0,
+            "634": 428612096.0,
+            "635": 428612096.0,
+            "636": 428612096.0,
+            "637": 428612096.0,
+            "638": 428612096.0,
+            "639": 428612096.0,
+            "640": 428612096.0,
+            "641": 428612096.0,
+            "642": 428612096.0,
+            "643": 428612096.0,
+            "644": 428612096.0,
+            "645": 428612096.0,
+            "646": 428612096.0,
+            "647": 428612096.0,
+            "648": 428612096.0,
+            "649": 428612096.0,
+            "650": 428612096.0,
+            "651": 428612096.0,
+            "652": 428612096.0,
+            "653": 428612096.0,
+            "654": 428612096.0,
+            "655": 428612096.0,
+            "656": 428612096.0,
+            "657": 428612096.0,
+            "658": 428612096.0,
+            "659": 428612096.0,
+            "660": 428612096.0,
+            "661": 428612096.0,
+            "662": 428612096.0,
+            "663": 428612096.0,
+            "664": 428612096.0,
+            "665": 428612096.0,
+            "666": 428612096.0,
+            "667": 428612096.0,
+            "668": 428612096.0,
+            "669": 428612096.0,
+            "670": 428612096.0,
+            "671": 428612096.0,
+            "672": 428612096.0,
+            "673": 428612096.0,
+            "674": 428612096.0,
+            "675": 428612096.0,
+            "676": 428612096.0,
+            "677": 428612096.0,
+            "678": 428612096.0,
+            "679": 428612096.0,
+            "680": 428612096.0,
+            "681": 428612096.0,
+            "682": 428612096.0,
+            "683": 428612096.0,
+            "684": 428612096.0,
+            "685": 428612096.0,
+            "686": 428612096.0,
+            "687": 428612096.0,
+            "688": 428612096.0,
+            "689": 428612096.0,
+            "690": 428612096.0,
+            "691": 428612096.0,
+            "692": 428612096.0,
+            "693": 428612096.0,
+            "694": 428612096.0,
+            "695": 428612096.0,
+            "696": 428612096.0,
+            "697": 428612096.0,
+            "698": 428612096.0,
+            "699": 428612096.0,
+            "700": 428612096.0,
+            "701": 428612096.0,
+            "702": 428612096.0,
+            "703": 428612096.0,
+            "704": 428612096.0,
+            "705": 428612096.0,
+            "706": 428612096.0,
+            "707": 428612096.0,
+            "708": 428612096.0,
+            "709": 428612096.0,
+            "710": 428612096.0,
+            "711": 428612096.0,
+            "712": 428612096.0,
+            "713": 428612096.0,
+            "714": 428612096.0,
+            "715": 428612096.0,
+            "716": 428612096.0,
+            "717": 428612096.0,
+            "718": 428612096.0,
+            "719": 428612096.0,
+            "720": 428612096.0,
+            "721": 428612096.0,
+            "722": 428612096.0,
+            "723": 428612096.0,
+            "724": 428612096.0,
+            "725": 428612096.0,
+            "726": 428612096.0,
+            "727": 428612096.0,
+            "728": 428612096.0,
+            "729": 428612096.0,
+            "730": 428612096.0,
+            "731": 428612096.0,
+            "732": 428612096.0,
+            "733": 428612096.0,
+            "734": 428612096.0,
+            "735": 428612096.0,
+            "736": 428612096.0,
+            "737": 428612096.0,
+            "738": 428612096.0,
+            "739": 428612096.0,
+            "740": 428612096.0,
+            "741": 428612096.0,
+            "742": 428612096.0,
+            "743": 428612096.0,
+            "744": 428612096.0,
+            "745": 428612096.0,
+            "746": 428612096.0,
+            "747": 428612096.0,
+            "748": 428612096.0,
+            "749": 428612096.0,
+            "750": 428612096.0,
+            "751": 428612096.0,
+            "752": 428612096.0,
+            "753": 428612096.0,
+            "754": 428612096.0,
+            "755": 428612096.0,
+            "756": 428612096.0,
+            "757": 428612096.0,
+            "758": 428612096.0,
+            "759": 428612096.0,
+            "760": 428612096.0,
+            "761": 428612096.0,
+            "762": 428612096.0,
+            "763": 428612096.0,
+            "764": 428612096.0,
+            "765": 428612096.0,
+            "766": 428612096.0,
+            "767": 428612096.0,
+            "768": 428612096.0,
+            "769": 428612096.0,
+            "770": 428612096.0,
+            "771": 428612096.0,
+            "772": 428612096.0,
+            "773": 428612096.0,
+            "774": 428612096.0,
+            "775": 428612096.0,
+            "776": 428612096.0,
+            "777": 428612096.0,
+            "778": 428612096.0,
+            "779": 428612096.0,
+            "780": 428612096.0,
+            "781": 428612096.0,
+            "782": 428612096.0,
+            "783": 428612096.0,
+            "784": 428612096.0,
+            "785": 428612096.0,
+            "786": 428612096.0,
+            "787": 428612096.0,
+            "788": 428612096.0,
+            "789": 428612096.0,
+            "790": 428612096.0,
+            "791": 428612096.0,
+            "792": 428612096.0,
+            "793": 428612096.0,
+            "794": 428612096.0,
+            "795": 428612096.0,
+            "796": 428612096.0,
+            "797": 428612096.0,
+            "798": 428612096.0,
+            "799": 428612096.0,
+            "800": 428612096.0,
+            "801": 428612096.0,
+            "802": 428612096.0,
+            "803": 428612096.0,
+            "804": 428612096.0,
+            "805": 428612096.0,
+            "806": 428612096.0,
+            "807": 428612096.0,
+            "808": 428612096.0,
+            "809": 428612096.0,
+            "810": 428612096.0,
+            "811": 428612096.0,
+            "812": 428612096.0,
+            "813": 428612096.0,
+            "814": 428612096.0,
+            "815": 428612096.0,
+            "816": 428612096.0,
+            "817": 428612096.0,
+            "818": 428612096.0,
+            "819": 428612096.0,
+            "820": 428612096.0,
+            "821": 428612096.0,
+            "822": 428612096.0,
+            "823": 428612096.0,
+            "824": 428612096.0,
+            "825": 428612096.0,
+            "826": 428612096.0,
+            "827": 428612096.0,
+            "828": 428612096.0,
+            "829": 428612096.0,
+            "830": 428612096.0,
+            "831": 428612096.0,
+            "832": 428612096.0,
+            "833": 428612096.0,
+            "834": 428612096.0,
+            "835": 428612096.0,
+            "836": 428612096.0,
+            "837": 428612096.0,
+            "838": 428612096.0,
+            "839": 428612096.0,
+            "840": 428612096.0,
+            "841": 428612096.0,
+            "842": 428612096.0,
+            "843": 428612096.0,
+            "844": 428612096.0,
+            "845": 428612096.0,
+            "846": 428612096.0,
+            "847": 428612096.0,
+            "848": 428612096.0,
+            "849": 428612096.0,
+            "850": 428612096.0,
+            "851": 428612096.0,
+            "852": 428612096.0,
+            "853": 428612096.0,
+            "854": 428612096.0,
+            "855": 428612096.0,
+            "856": 428612096.0,
+            "857": 428612096.0,
+            "858": 428612096.0,
+            "859": 428612096.0,
+            "860": 428612096.0,
+            "861": 428612096.0,
+            "862": 428612096.0,
+            "863": 428612096.0,
+            "864": 428612096.0,
+            "865": 428612096.0,
+            "866": 428612096.0,
+            "867": 428612096.0,
+            "868": 428612096.0,
+            "869": 428612096.0,
+            "870": 428612096.0,
+            "871": 428612096.0,
+            "872": 428612096.0,
+            "873": 428612096.0,
+            "874": 428612096.0,
+            "875": 428612096.0,
+            "876": 428612096.0,
+            "877": 428612096.0,
+            "878": 428612096.0,
+            "879": 428612096.0,
+            "880": 428612096.0,
+            "881": 428612096.0,
+            "882": 428612096.0,
+            "883": 428612096.0,
+            "884": 428612096.0,
+            "885": 428612096.0,
+            "886": 428612096.0,
+            "887": 428612096.0,
+            "888": 428612096.0,
+            "889": 428612096.0,
+            "890": 428612096.0,
+            "891": 428612096.0,
+            "892": 428612096.0,
+            "893": 428612096.0,
+            "894": 428612096.0,
+            "895": 428612096.0,
+            "896": 428612096.0,
+            "897": 428612096.0,
+            "898": 428612096.0,
+            "899": 428612096.0,
+            "900": 428612096.0,
+            "901": 428612096.0,
+            "902": 428612096.0,
+            "903": 428612096.0,
+            "904": 428612096.0,
+            "905": 428612096.0,
+            "906": 428612096.0,
+            "907": 428612096.0,
+            "908": 428612096.0,
+            "909": 428612096.0,
+            "910": 428612096.0,
+            "911": 428612096.0,
+            "912": 428612096.0,
+            "913": 428612096.0,
+            "914": 428612096.0,
+            "915": 428612096.0,
+            "916": 428612096.0,
+            "917": 428612096.0,
+            "918": 428612096.0,
+            "919": 428612096.0,
+            "920": 428612096.0,
+            "921": 428612096.0,
+            "922": 428612096.0,
+            "923": 428612096.0,
+            "924": 428612096.0,
+            "925": 428612096.0,
+            "926": 428612096.0,
+            "927": 428612096.0,
+            "928": 428612096.0,
+            "929": 428612096.0,
+            "930": 428612096.0,
+            "931": 428612096.0,
+            "932": 428612096.0,
+            "933": 428612096.0,
+            "934": 428612096.0,
+            "935": 428612096.0,
+            "936": 428612096.0,
+            "937": 428612096.0,
+            "938": 428612096.0,
+            "939": 428612096.0,
+            "940": 428612096.0,
+            "941": 428612096.0,
+            "942": 428612096.0,
+            "943": 428612096.0,
+            "944": 428612096.0,
+            "945": 428612096.0,
+            "946": 428612096.0,
+            "947": 428612096.0,
+            "948": 428612096.0,
+            "949": 428612096.0,
+            "950": 428612096.0,
+            "951": 428612096.0,
+            "952": 428612096.0,
+            "953": 428612096.0,
+            "954": 428612096.0,
+            "955": 428612096.0,
+            "956": 428612096.0,
+            "957": 428612096.0,
+            "958": 428612096.0,
+            "959": 428612096.0,
+            "960": 428612096.0,
+            "961": 428612096.0,
+            "962": 428612096.0,
+            "963": 428612096.0,
+            "964": 428612096.0,
+            "965": 428612096.0,
+            "966": 428612096.0,
+            "967": 428612096.0,
+            "968": 428612096.0,
+            "969": 428612096.0,
+            "970": 428612096.0,
+            "971": 428612096.0,
+            "972": 428612096.0,
+            "973": 428612096.0,
+            "974": 428612096.0,
+            "975": 428612096.0,
+            "976": 428612096.0,
+            "977": 428612096.0,
+            "978": 428612096.0,
+            "979": 428612096.0,
+            "980": 428612096.0,
+            "981": 428612096.0,
+            "982": 428612096.0,
+            "983": 428612096.0,
+            "984": 428612096.0,
+            "985": 428612096.0,
+            "986": 428612096.0,
+            "987": 428612096.0,
+            "988": 428612096.0,
+            "989": 428612096.0,
+            "990": 428612096.0,
+            "991": 428612096.0,
+            "992": 428612096.0,
+            "993": 428612096.0,
+            "994": 428612096.0,
+            "995": 428612096.0,
+            "996": 428612096.0,
+            "997": 428612096.0,
+            "998": 428612096.0,
+            "999": 428612096.0,
+            "1000": 428612096.0,
+            "1001": 428612096.0,
+            "1002": 428612096.0,
+            "1003": 428612096.0,
+            "1004": 428612096.0,
+            "1005": 428612096.0,
+            "1006": 428612096.0,
+            "1007": 428612096.0,
+            "1008": 428612096.0,
+            "1009": 428612096.0,
+            "1010": 428612096.0,
+            "1011": 428612096.0,
+            "1012": 428612096.0,
+            "1013": 428612096.0,
+            "1014": 428612096.0,
+            "1015": 428612096.0,
+            "1016": 428612096.0,
+            "1017": 428612096.0,
+            "1018": 428612096.0,
+            "1019": 428612096.0,
+            "1020": 428612096.0,
+            "1021": 428612096.0,
+            "1022": 428612096.0,
+            "1023": 428612096.0,
+            "1024": 428612096.0,
+            "1025": 428612096.0,
+            "1026": 428612096.0,
+            "1027": 428612096.0,
+            "1028": 428612096.0,
+            "1029": 428612096.0,
+            "1030": 428612096.0,
+            "1031": 428612096.0,
+            "1032": 428612096.0,
+            "1033": 428612096.0,
+            "1034": 428612096.0,
+            "1035": 428612096.0,
+            "1036": 428612096.0,
+            "1037": 428612096.0,
+            "1038": 428612096.0,
+            "1039": 428612096.0,
+            "1040": 428612096.0,
+            "1041": 428612096.0,
+            "1042": 428612096.0,
+            "1043": 428612096.0,
+            "1044": 428612096.0,
+            "1045": 428612096.0,
+            "1046": 428612096.0,
+            "1047": 428612096.0,
+            "1048": 428612096.0,
+            "1049": 428612096.0,
+            "1050": 428612096.0,
+            "1051": 428612096.0,
+            "1052": 428612096.0,
+            "1053": 428612096.0,
+            "1054": 428612096.0,
+            "1055": 428612096.0,
+            "1056": 428612096.0,
+            "1057": 428612096.0,
+            "1058": 428612096.0,
+            "1059": 428612096.0,
+            "1060": 428612096.0,
+            "1061": 428612096.0,
+            "1062": 428612096.0,
+            "1063": 428612096.0,
+            "1064": 428612096.0,
+            "1065": 428612096.0,
+            "1066": 428612096.0,
+            "1067": 428612096.0,
+            "1068": 428612096.0,
+            "1069": 428612096.0,
+            "1070": 428612096.0,
+            "1071": 428612096.0,
+            "1072": 428612096.0,
+            "1073": 428612096.0,
+            "1074": 428612096.0,
+            "1075": 428612096.0,
+            "1076": 428612096.0,
+            "1077": 428612096.0,
+            "1078": 428612096.0,
+            "1079": 428612096.0,
+            "1080": 428612096.0,
+            "1081": 428612096.0,
+            "1082": 428612096.0,
+            "1083": 428612096.0,
+            "1084": 428612096.0,
+            "1085": 428612096.0,
+            "1086": 428612096.0,
+            "1087": 428612096.0,
+            "1088": 428612096.0,
+            "1089": 428612096.0,
+            "1090": 428612096.0,
+            "1091": 428612096.0,
+            "1092": 428612096.0,
+            "1093": 428612096.0,
+            "1094": 428612096.0,
+            "1095": 428612096.0,
+            "1096": 428612096.0,
+            "1097": 428612096.0,
+            "1098": 428612096.0,
+            "1099": 428612096.0,
+            "1100": 428612096.0,
+            "1101": 428612096.0,
+            "1102": 428612096.0,
+            "1103": 428612096.0,
+            "1104": 428612096.0,
+            "1105": 428612096.0,
+            "1106": 428612096.0,
+            "1107": 428612096.0,
+            "1108": 428612096.0,
+            "1109": 428612096.0,
+            "1110": 428612096.0,
+            "1111": 428612096.0,
+            "1112": 428612096.0,
+            "1113": 428612096.0,
+            "1114": 428612096.0,
+            "1115": 428612096.0,
+            "1116": 428612096.0,
+            "1117": 428612096.0,
+            "1118": 428612096.0,
+            "1119": 428612096.0,
+            "1120": 428612096.0,
+            "1121": 428612096.0,
+            "1122": 428612096.0,
+            "1123": 428612096.0,
+            "1124": 428612096.0,
+            "1125": 428612096.0,
+            "1126": 428612096.0,
+            "1127": 428612096.0,
+            "1128": 428612096.0,
+            "1129": 428612096.0,
+            "1130": 428612096.0,
+            "1131": 428612096.0,
+            "1132": 428612096.0,
+            "1133": 428612096.0,
+            "1134": 428612096.0,
+            "1135": 428612096.0,
+            "1136": 428612096.0,
+            "1137": 428612096.0,
+            "1138": 428612096.0,
+            "1139": 428612096.0,
+            "1140": 428612096.0,
+            "1141": 428612096.0,
+            "1142": 428612096.0,
+            "1143": 428612096.0,
+            "1144": 428612096.0,
+            "1145": 428612096.0,
+            "1146": 428612096.0,
+            "1147": 428612096.0,
+            "1148": 428612096.0,
+            "1149": 428612096.0,
+            "1150": 428612096.0,
+            "1151": 428612096.0,
+            "1152": 428612096.0,
+            "1153": 428612096.0,
+            "1154": 428612096.0,
+            "1155": 428612096.0,
+            "1156": 428612096.0,
+            "1157": 428612096.0,
+            "1158": 428612096.0,
+            "1159": 428612096.0,
+            "1160": 428612096.0,
+            "1161": 428612096.0,
+            "1162": 428612096.0,
+            "1163": 428612096.0,
+            "1164": 428612096.0,
+            "1165": 428612096.0,
+            "1166": 428612096.0,
+            "1167": 428612096.0,
+            "1168": 428612096.0,
+            "1169": 428612096.0,
+            "1170": 428612096.0,
+            "1171": 428612096.0,
+            "1172": 428612096.0,
+            "1173": 428612096.0,
+            "1174": 428612096.0,
+            "1175": 428612096.0,
+            "1176": 428612096.0,
+            "1177": 428612096.0,
+            "1178": 428612096.0,
+            "1179": 428612096.0,
+            "1180": 428612096.0,
+            "1181": 428612096.0,
+            "1182": 428612096.0,
+            "1183": 428612096.0,
+            "1184": 428612096.0,
+            "1185": 428612096.0,
+            "1186": 428612096.0,
+            "1187": 428612096.0,
+            "1188": 428612096.0,
+            "1189": 428612096.0,
+            "1190": 428612096.0,
+            "1191": 428612096.0,
+            "1192": 428612096.0,
+            "1193": 428612096.0,
+            "1194": 428612096.0,
+            "1195": 428612096.0,
+            "1196": 428612096.0,
+            "1197": 428612096.0,
+            "1198": 428612096.0,
+            "1199": 428612096.0,
+            "1200": 428612096.0,
+            "1201": 428612096.0,
+            "1202": 428612096.0,
+            "1203": 428612096.0,
+            "1204": 428612096.0,
+            "1205": 428612096.0,
+            "1206": 428612096.0,
+            "1207": 428612096.0,
+            "1208": 428612096.0,
+            "1209": 428612096.0,
+            "1210": 428612096.0,
+            "1211": 428612096.0,
+            "1212": 428612096.0,
+            "1213": 428612096.0,
+            "1214": 428612096.0,
+            "1215": 428612096.0,
+            "1216": 428612096.0,
+            "1217": 428612096.0,
+            "1218": 428612096.0,
+            "1219": 428612096.0,
+            "1220": 428612096.0,
+            "1221": 428612096.0,
+            "1222": 428612096.0,
+            "1223": 428612096.0,
+            "1224": 428612096.0,
+            "1225": 428612096.0,
+            "1226": 428612096.0,
+            "1227": 428612096.0,
+            "1228": 428612096.0,
+            "1229": 428612096.0,
+            "1230": 428612096.0,
+            "1231": 428612096.0,
+            "1232": 428612096.0,
+            "1233": 428612096.0,
+            "1234": 428612096.0,
+            "1235": 428612096.0,
+            "1236": 428612096.0,
+            "1237": 428612096.0,
+            "1238": 428612096.0,
+            "1239": 428612096.0,
+            "1240": 428612096.0,
+            "1241": 428612096.0,
+            "1242": 428612096.0,
+            "1243": 428612096.0,
+            "1244": 428612096.0,
+            "1245": 428612096.0,
+            "1246": 428612096.0,
+            "1247": 428612096.0,
+            "1248": 428612096.0,
+            "1249": 428612096.0,
+            "1250": 428612096.0,
+            "1251": 428612096.0,
+            "1252": 428612096.0,
+            "1253": 428612096.0,
+            "1254": 428612096.0,
+            "1255": 428612096.0,
+            "1256": 428612096.0,
+            "1257": 428612096.0,
+            "1258": 428612096.0,
+            "1259": 428612096.0,
+            "1260": 428612096.0,
+            "1261": 428612096.0,
+            "1262": 428612096.0,
+            "1263": 428612096.0,
+            "1264": 428612096.0,
+            "1265": 428612096.0,
+            "1266": 428612096.0,
+            "1267": 428612096.0,
+            "1268": 428612096.0,
+            "1269": 428612096.0,
+            "1270": 428612096.0,
+            "1271": 428612096.0,
+            "1272": 428612096.0,
+            "1273": 428612096.0,
+            "1274": 428612096.0,
+            "1275": 428612096.0,
+            "1276": 428612096.0,
+            "1277": 428612096.0,
+            "1278": 428612096.0,
+            "1279": 428612096.0,
+            "1280": 428612096.0,
+            "1281": 428612096.0,
+            "1282": 428612096.0,
+            "1283": 428612096.0,
+            "1284": 428612096.0,
+            "1285": 428612096.0,
+            "1286": 428612096.0,
+            "1287": 428612096.0,
+            "1288": 428612096.0,
+            "1289": 428612096.0,
+            "1290": 428612096.0,
+            "1291": 428612096.0,
+            "1292": 428612096.0,
+            "1293": 428612096.0,
+            "1294": 428612096.0,
+            "1295": 428612096.0,
+            "1296": 428612096.0,
+            "1297": 428612096.0,
+            "1298": 428612096.0,
+            "1299": 428612096.0,
+            "1300": 428612096.0,
+            "1301": 428612096.0,
+            "1302": 428612096.0,
+            "1303": 428612096.0,
+            "1304": 428612096.0,
+            "1305": 428612096.0,
+            "1306": 428612096.0,
+            "1307": 428612096.0,
+            "1308": 428612096.0,
+            "1309": 428612096.0,
+            "1310": 428612096.0,
+            "1311": 428612096.0,
+            "1312": 428612096.0,
+            "1313": 428612096.0,
+            "1314": 428612096.0,
+            "1315": 428612096.0,
+            "1316": 428612096.0,
+            "1317": 428612096.0,
+            "1318": 428612096.0,
+            "1319": 428612096.0,
+            "1320": 428612096.0,
+            "1321": 428612096.0,
+            "1322": 428612096.0,
+            "1323": 428612096.0,
+            "1324": 428612096.0,
+            "1325": 428612096.0,
+            "1326": 428612096.0,
+            "1327": 428612096.0,
+            "1328": 428612096.0,
+            "1329": 428612096.0,
+            "1330": 428612096.0,
+            "1331": 428612096.0,
+            "1332": 428612096.0,
+            "1333": 428612096.0,
+            "1334": 428612096.0,
+            "1335": 428612096.0,
+            "1336": 428612096.0,
+            "1337": 428612096.0,
+            "1338": 428612096.0,
+            "1339": 428612096.0,
+            "1340": 428612096.0,
+            "1341": 428612096.0,
+            "1342": 428612096.0,
+            "1343": 428612096.0,
+            "1344": 428612096.0,
+            "1345": 428612096.0,
+            "1346": 428612096.0,
+            "1347": 428612096.0,
+            "1348": 428612096.0,
+            "1349": 428612096.0,
+            "1350": 428612096.0,
+            "1351": 428612096.0,
+            "1352": 428612096.0,
+            "1353": 428612096.0,
+            "1354": 428612096.0,
+            "1355": 428612096.0,
+            "1356": 428612096.0,
+            "1357": 428612096.0,
+            "1358": 428612096.0,
+            "1359": 428612096.0,
+            "1360": 428612096.0,
+            "1361": 428612096.0,
+            "1362": 428612096.0,
+            "1363": 428612096.0,
+            "1364": 428612096.0,
+            "1365": 428612096.0,
+            "1366": 428612096.0,
+            "1367": 428612096.0,
+            "1368": 428612096.0,
+            "1369": 428612096.0,
+            "1370": 428612096.0,
+            "1371": 428612096.0,
+            "1372": 428612096.0,
+            "1373": 428612096.0,
+            "1374": 428612096.0,
+            "1375": 428612096.0,
+            "1376": 428612096.0,
+            "1377": 428612096.0,
+            "1378": 428612096.0,
+            "1379": 428612096.0,
+            "1380": 428612096.0,
+            "1381": 428612096.0,
+            "1382": 428612096.0,
+            "1383": 428612096.0,
+            "1384": 428612096.0,
+            "1385": 428612096.0,
+            "1386": 428612096.0,
+            "1387": 428612096.0,
+            "1388": 428612096.0,
+            "1389": 428612096.0,
+            "1390": 428612096.0,
+            "1391": 428612096.0,
+            "1392": 428612096.0,
+            "1393": 428612096.0,
+            "1394": 428612096.0,
+            "1395": 428612096.0,
+            "1396": 428612096.0,
+            "1397": 428612096.0,
+            "1398": 428612096.0,
+            "1399": 428612096.0,
+            "1400": 428612096.0,
+            "1401": 428612096.0,
+            "1402": 428612096.0,
+            "1403": 428612096.0,
+            "1404": 428612096.0,
+            "1405": 428612096.0,
+            "1406": 428612096.0,
+            "1407": 428612096.0,
+            "1408": 428612096.0,
+            "1409": 428612096.0,
+            "1410": 428612096.0,
+            "1411": 428612096.0,
+            "1412": 428612096.0,
+            "1413": 428612096.0,
+            "1414": 428612096.0,
+            "1415": 428612096.0,
+            "1416": 428612096.0,
+            "1417": 428612096.0,
+            "1418": 428612096.0,
+            "1419": 428612096.0,
+            "1420": 428612096.0,
+            "1421": 428612096.0,
+            "1422": 428612096.0,
+            "1423": 428612096.0,
+            "1424": 428612096.0,
+            "1425": 428612096.0,
+            "1426": 428612096.0,
+            "1427": 428612096.0,
+            "1428": 428612096.0,
+            "1429": 428612096.0,
+            "1430": 428612096.0,
+            "1431": 428612096.0,
+            "1432": 428612096.0,
+            "1433": 428612096.0,
+            "1434": 428612096.0,
+            "1435": 428612096.0,
+            "1436": 428612096.0,
+            "1437": 428612096.0,
+            "1438": 428612096.0,
+            "1439": 428612096.0,
+            "1440": 428612096.0,
+            "1441": 428612096.0,
+            "1442": 428612096.0,
+            "1443": 428612096.0,
+            "1444": 428612096.0,
+            "1445": 428612096.0,
+            "1446": 428612096.0,
+            "1447": 428612096.0,
+            "1448": 428612096.0,
+            "1449": 428612096.0,
+            "1450": 428612096.0,
+            "1451": 428612096.0,
+            "1452": 428612096.0,
+            "1453": 428612096.0,
+            "1454": 428612096.0,
+            "1455": 428612096.0,
+            "1456": 428612096.0,
+            "1457": 428612096.0,
+            "1458": 428612096.0,
+            "1459": 428612096.0,
+            "1460": 428612096.0,
+            "1461": 428612096.0,
+            "1462": 428612096.0,
+            "1463": 428612096.0,
+            "1464": 428612096.0,
+            "1465": 428612096.0,
+            "1466": 428612096.0,
+            "1467": 428612096.0,
+            "1468": 428612096.0,
+            "1469": 428612096.0,
+            "1470": 428612096.0,
+            "1471": 428612096.0,
+            "1472": 428612096.0,
+            "1473": 428612096.0,
+            "1474": 428612096.0,
+            "1475": 428612096.0,
+            "1476": 428612096.0,
+            "1477": 428612096.0,
+            "1478": 428612096.0,
+            "1479": 428612096.0,
+            "1480": 428612096.0,
+            "1481": 428612096.0,
+            "1482": 428612096.0,
+            "1483": 428612096.0,
+            "1484": 428612096.0,
+            "1485": 428612096.0,
+            "1486": 428612096.0,
+            "1487": 428612096.0,
+            "1488": 428612096.0,
+            "1489": 428612096.0,
+            "1490": 428612096.0,
+            "1491": 428612096.0,
+            "1492": 428612096.0,
+            "1493": 428612096.0,
+            "1494": 428612096.0,
+            "1495": 428612096.0,
+            "1496": 428612096.0,
+            "1497": 428612096.0,
+            "1498": 428612096.0,
+            "1499": 428612096.0,
+            "1500": 428612096.0,
+            "1501": 428612096.0,
+            "1502": 428612096.0,
+            "1503": 428612096.0,
+            "1504": 428612096.0,
+            "1505": 428612096.0,
+            "1506": 428612096.0,
+            "1507": 428612096.0,
+            "1508": 428612096.0,
+            "1509": 428612096.0,
+            "1510": 428612096.0,
+            "1511": 428612096.0,
+            "1512": 428612096.0,
+            "1513": 428612096.0,
+            "1514": 428612096.0,
+            "1515": 428612096.0,
+            "1516": 428612096.0,
+            "1517": 428612096.0,
+            "1518": 428612096.0,
+            "1519": 428612096.0,
+            "1520": 428612096.0,
+            "1521": 428612096.0,
+            "1522": 428612096.0,
+            "1523": 428612096.0,
+            "1524": 428612096.0,
+            "1525": 428612096.0,
+            "1526": 428612096.0,
+            "1527": 428612096.0,
+            "1528": 428612096.0,
+            "1529": 428612096.0,
+            "1530": 428612096.0,
+            "1531": 428612096.0,
+            "1532": 428612096.0,
+            "1533": 428612096.0,
+            "1534": 428612096.0,
+            "1535": 428612096.0,
+            "1536": 428612096.0,
+            "1537": 428612096.0,
+            "1538": 428612096.0,
+            "1539": 428612096.0,
+            "1540": 428612096.0,
+            "1541": 428612096.0,
+            "1542": 428612096.0,
+            "1543": 428612096.0,
+            "1544": 428612096.0,
+            "1545": 428612096.0,
+            "1546": 428612096.0,
+            "1547": 428612096.0,
+            "1548": 428612096.0,
+            "1549": 428612096.0,
+            "1550": 428612096.0,
+            "1551": 428612096.0,
+            "1552": 428612096.0,
+            "1553": 428612096.0,
+            "1554": 428612096.0,
+            "1555": 428612096.0,
+            "1556": 428612096.0,
+            "1557": 428612096.0,
+            "1558": 428612096.0,
+            "1559": 428612096.0,
+            "1560": 428612096.0,
+            "1561": 428612096.0,
+            "1562": 428612096.0,
+            "1563": 428612096.0,
+            "1564": 428612096.0,
+            "1565": 428612096.0,
+            "1566": 428612096.0,
+            "1567": 428612096.0,
+            "1568": 428612096.0,
+            "1569": 428612096.0,
+            "1570": 428612096.0,
+            "1571": 428612096.0,
+            "1572": 428612096.0,
+            "1573": 428612096.0,
+            "1574": 428612096.0,
+            "1575": 428612096.0,
+            "1576": 428612096.0,
+            "1577": 428612096.0,
+            "1578": 428612096.0,
+            "1579": 428612096.0,
+            "1580": 428612096.0,
+            "1581": 428612096.0,
+            "1582": 428612096.0,
+            "1583": 428612096.0,
+            "1584": 428612096.0,
+            "1585": 428612096.0,
+            "1586": 428612096.0,
+            "1587": 428612096.0,
+            "1588": 428612096.0,
+            "1589": 428612096.0,
+            "1590": 428612096.0,
+            "1591": 428612096.0,
+            "1592": 428612096.0,
+            "1593": 428612096.0,
+            "1594": 428612096.0,
+            "1595": 428612096.0,
+            "1596": 428612096.0,
+            "1597": 428612096.0,
+            "1598": 428612096.0,
+            "1599": 428612096.0,
+            "1600": 428612096.0,
+            "1601": 428612096.0,
+            "1602": 428612096.0,
+            "1603": 428612096.0,
+            "1604": 428612096.0,
+            "1605": 428612096.0,
+            "1606": 428612096.0,
+            "1607": 428612096.0,
+            "1608": 428612096.0,
+            "1609": 428612096.0,
+            "1610": 428612096.0,
+            "1611": 428612096.0,
+            "1612": 428612096.0,
+            "1613": 428612096.0,
+            "1614": 428612096.0,
+            "1615": 428612096.0,
+            "1616": 428612096.0,
+            "1617": 428612096.0,
+            "1618": 428612096.0,
+            "1619": 428612096.0,
+            "1620": 428612096.0,
+            "1621": 428612096.0,
+            "1622": 428612096.0,
+            "1623": 428612096.0,
+            "1624": 428612096.0,
+            "1625": 428612096.0,
+            "1626": 428612096.0,
+            "1627": 428612096.0,
+            "1628": 428612096.0,
+            "1629": 428612096.0,
+            "1630": 428612096.0,
+            "1631": 428612096.0,
+            "1632": 428612096.0,
+            "1633": 428612096.0,
+            "1634": 428612096.0,
+            "1635": 428612096.0,
+            "1636": 428612096.0,
+            "1637": 428612096.0,
+            "1638": 428612096.0,
+            "1639": 428612096.0,
+            "1640": 428612096.0,
+            "1641": 428612096.0,
+            "1642": 428612096.0,
+            "1643": 428612096.0,
+            "1644": 428612096.0,
+            "1645": 428612096.0,
+            "1646": 428612096.0,
+            "1647": 428612096.0,
+            "1648": 428612096.0,
+            "1649": 428612096.0,
+            "1650": 428612096.0,
+            "1651": 428612096.0,
+            "1652": 428612096.0,
+            "1653": 428612096.0,
+            "1654": 428612096.0,
+            "1655": 428612096.0,
+            "1656": 428612096.0,
+            "1657": 428612096.0,
+            "1658": 428612096.0,
+            "1659": 428612096.0,
+            "1660": 428612096.0,
+            "1661": 428612096.0,
+            "1662": 428612096.0,
+            "1663": 428612096.0,
+            "1664": 428612096.0,
+            "1665": 428612096.0,
+            "1666": 428612096.0,
+            "1667": 428612096.0,
+            "1668": 428612096.0,
+            "1669": 428612096.0,
+            "1670": 428612096.0,
+            "1671": 428612096.0,
+            "1672": 428612096.0,
+            "1673": 428612096.0,
+            "1674": 428612096.0,
+            "1675": 428612096.0,
+            "1676": 428612096.0,
+            "1677": 428612096.0,
+            "1678": 428612096.0,
+            "1679": 428612096.0,
+            "1680": 428612096.0,
+            "1681": 428612096.0,
+            "1682": 428612096.0,
+            "1683": 428612096.0,
+            "1684": 428612096.0,
+            "1685": 428612096.0,
+            "1686": 428612096.0,
+            "1687": 428612096.0,
+            "1688": 428612096.0,
+            "1689": 428612096.0,
+            "1690": 428612096.0,
+            "1691": 428612096.0,
+            "1692": 428612096.0,
+            "1693": 428612096.0,
+            "1694": 428612096.0,
+            "1695": 428612096.0,
+            "1696": 428612096.0,
+            "1697": 428612096.0,
+            "1698": 428612096.0,
+            "1699": 428612096.0,
+            "1700": 428612096.0,
+            "1701": 428612096.0,
+            "1702": 428612096.0,
+            "1703": 428612096.0,
+            "1704": 428612096.0,
+            "1705": 428612096.0,
+            "1706": 428612096.0,
+            "1707": 428612096.0,
+            "1708": 428612096.0,
+            "1709": 428612096.0,
+            "1710": 428612096.0,
+            "1711": 428612096.0,
+            "1712": 428612096.0,
+            "1713": 428612096.0,
+            "1714": 428612096.0,
+            "1715": 428612096.0,
+            "1716": 428612096.0,
+            "1717": 428612096.0,
+            "1718": 428612096.0,
+            "1719": 428612096.0,
+            "1720": 428612096.0,
+            "1721": 428612096.0,
+            "1722": 428612096.0,
+            "1723": 428612096.0,
+            "1724": 428612096.0,
+            "1725": 428612096.0,
+            "1726": 428612096.0,
+            "1727": 428612096.0,
+            "1728": 428612096.0,
+            "1729": 428612096.0,
+            "1730": 428612096.0,
+            "1731": 428612096.0,
+            "1732": 428612096.0,
+            "1733": 428612096.0,
+            "1734": 428612096.0,
+            "1735": 428612096.0,
+            "1736": 428612096.0,
+            "1737": 428612096.0,
+            "1738": 428612096.0,
+            "1739": 428612096.0,
+            "1740": 428612096.0,
+            "1741": 428612096.0,
+            "1742": 428612096.0,
+            "1743": 428612096.0,
+            "1744": 428612096.0,
+            "1745": 428612096.0,
+            "1746": 428612096.0,
+            "1747": 428612096.0,
+            "1748": 428612096.0,
+            "1749": 428612096.0,
+            "1750": 428612096.0,
+            "1751": 428612096.0,
+            "1752": 428612096.0,
+            "1753": 428612096.0,
+            "1754": 428612096.0,
+            "1755": 428612096.0,
+            "1756": 428612096.0,
+            "1757": 428612096.0,
+            "1758": 428612096.0,
+            "1759": 428612096.0,
+            "1760": 428612096.0,
+            "1761": 428612096.0,
+            "1762": 428612096.0,
+            "1763": 428612096.0,
+            "1764": 428612096.0,
+            "1765": 428612096.0,
+            "1766": 428612096.0,
+            "1767": 428612096.0,
+            "1768": 428612096.0,
+            "1769": 428612096.0,
+            "1770": 428612096.0,
+            "1771": 428612096.0,
+            "1772": 428612096.0,
+            "1773": 428612096.0,
+            "1774": 428612096.0,
+            "1775": 428612096.0,
+            "1776": 428612096.0,
+            "1777": 428612096.0,
+            "1778": 428612096.0,
+            "1779": 428612096.0,
+            "1780": 428612096.0,
+            "1781": 428612096.0,
+            "1782": 428612096.0,
+            "1783": 428612096.0,
+            "1784": 428612096.0,
+            "1785": 428612096.0,
+            "1786": 428612096.0,
+            "1787": 428612096.0,
+            "1788": 428612096.0,
+            "1789": 428612096.0,
+            "1790": 428612096.0,
+            "1791": 428612096.0,
+            "1792": 428612096.0,
+            "1793": 428612096.0,
+            "1794": 428612096.0,
+            "1795": 428612096.0,
+            "1796": 428612096.0,
+            "1797": 428612096.0,
+            "1798": 428612096.0,
+            "1799": 428612096.0,
+            "1800": 428612096.0,
+            "1801": 428612096.0,
+            "1802": 428612096.0,
+            "1803": 428612096.0,
+            "1804": 428612096.0,
+            "1805": 428612096.0,
+            "1806": 428612096.0,
+            "1807": 428612096.0,
+            "1808": 428612096.0,
+            "1809": 428612096.0,
+            "1810": 428612096.0,
+            "1811": 428612096.0,
+            "1812": 428612096.0,
+            "1813": 428612096.0,
+            "1814": 428612096.0,
+            "1815": 428612096.0,
+            "1816": 428612096.0,
+            "1817": 428612096.0,
+            "1818": 428612096.0,
+            "1819": 428612096.0,
+            "1820": 428612096.0,
+            "1821": 428612096.0,
+            "1822": 428612096.0,
+            "1823": 428612096.0,
+            "1824": 428612096.0,
+            "1825": 428612096.0,
+            "1826": 428612096.0,
+            "1827": 428612096.0,
+            "1828": 428612096.0,
+            "1829": 428612096.0,
+            "1830": 428612096.0,
+            "1831": 428612096.0,
+            "1832": 428612096.0,
+            "1833": 428612096.0,
+            "1834": 428612096.0,
+            "1835": 428612096.0,
+            "1836": 428612096.0,
+            "1837": 428612096.0,
+            "1838": 428612096.0,
+            "1839": 428612096.0,
+            "1840": 428612096.0,
+            "1841": 428612096.0,
+            "1842": 428612096.0,
+            "1843": 428612096.0,
+            "1844": 428612096.0,
+            "1845": 428612096.0,
+            "1846": 428612096.0,
+            "1847": 428612096.0,
+            "1848": 428612096.0,
+            "1849": 428612096.0,
+            "1850": 428612096.0,
+            "1851": 428612096.0,
+            "1852": 428612096.0,
+            "1853": 428612096.0,
+            "1854": 428612096.0,
+            "1855": 428612096.0,
+            "1856": 428612096.0,
+            "1857": 428612096.0,
+            "1858": 428612096.0,
+            "1859": 428612096.0,
+            "1860": 428612096.0,
+            "1861": 428612096.0,
+            "1862": 428612096.0,
+            "1863": 428612096.0,
+            "1864": 428612096.0,
+            "1865": 428612096.0,
+            "1866": 428612096.0,
+            "1867": 428612096.0,
+            "1868": 428612096.0,
+            "1869": 428612096.0,
+            "1870": 428612096.0,
+            "1871": 428612096.0,
+            "1872": 428612096.0,
+            "1873": 428612096.0,
+            "1874": 428612096.0,
+            "1875": 428612096.0,
+            "1876": 428612096.0,
+            "1877": 428612096.0,
+            "1878": 428612096.0,
+            "1879": 428612096.0,
+            "1880": 428612096.0,
+            "1881": 428612096.0,
+            "1882": 428612096.0,
+            "1883": 428612096.0,
+            "1884": 428612096.0,
+            "1885": 428612096.0,
+            "1886": 428612096.0,
+            "1887": 428612096.0,
+            "1888": 428612096.0,
+            "1889": 428612096.0,
+            "1890": 428612096.0,
+            "1891": 428612096.0,
+            "1892": 428612096.0,
+            "1893": 428612096.0,
+            "1894": 428612096.0,
+            "1895": 428612096.0,
+            "1896": 428612096.0,
+            "1897": 428612096.0,
+            "1898": 428612096.0,
+            "1899": 428612096.0,
+            "1900": 428612096.0,
+            "1901": 428612096.0,
+            "1902": 428612096.0,
+            "1903": 428612096.0,
+            "1904": 428612096.0,
+            "1905": 428612096.0,
+            "1906": 428612096.0,
+            "1907": 428612096.0,
+            "1908": 428612096.0,
+            "1909": 428612096.0,
+            "1910": 428612096.0,
+            "1911": 428612096.0,
+            "1912": 428612096.0,
+            "1913": 428612096.0,
+            "1914": 428612096.0,
+            "1915": 428612096.0,
+            "1916": 428612096.0,
+            "1917": 428612096.0,
+            "1918": 428612096.0,
+            "1919": 428612096.0,
+            "1920": 428612096.0,
+            "1921": 428612096.0,
+            "1922": 428612096.0,
+            "1923": 428612096.0,
+            "1924": 428612096.0,
+            "1925": 428612096.0,
+            "1926": 428612096.0,
+            "1927": 428612096.0,
+            "1928": 428612096.0,
+            "1929": 428612096.0,
+            "1930": 428612096.0,
+            "1931": 428612096.0,
+            "1932": 428612096.0,
+            "1933": 428612096.0,
+            "1934": 428612096.0,
+            "1935": 428612096.0,
+            "1936": 428612096.0,
+            "1937": 428612096.0,
+            "1938": 428612096.0,
+            "1939": 428612096.0,
+            "1940": 428612096.0,
+            "1941": 428612096.0,
+            "1942": 428612096.0,
+            "1943": 428612096.0,
+            "1944": 428612096.0,
+            "1945": 428612096.0,
+            "1946": 428612096.0,
+            "1947": 428612096.0,
+            "1948": 428612096.0,
+            "1949": 428612096.0,
+            "1950": 428612096.0,
+            "1951": 428612096.0,
+            "1952": 428612096.0,
+            "1953": 428612096.0,
+            "1954": 428612096.0,
+            "1955": 428612096.0,
+            "1956": 428612096.0,
+            "1957": 428612096.0,
+            "1958": 428612096.0,
+            "1959": 428612096.0,
+            "1960": 428612096.0,
+            "1961": 428612096.0,
+            "1962": 428612096.0,
+            "1963": 428612096.0,
+            "1964": 428612096.0,
+            "1965": 428612096.0,
+            "1966": 428612096.0,
+            "1967": 428612096.0,
+            "1968": 428612096.0,
+            "1969": 428612096.0,
+            "1970": 428612096.0,
+            "1971": 428612096.0,
+            "1972": 428612096.0,
+            "1973": 428612096.0,
+            "1974": 428612096.0,
+            "1975": 428612096.0,
+            "1976": 428612096.0,
+            "1977": 428612096.0,
+            "1978": 428612096.0,
+            "1979": 428612096.0,
+            "1980": 428612096.0,
+            "1981": 428612096.0,
+            "1982": 428612096.0,
+            "1983": 428612096.0,
+            "1984": 428612096.0,
+            "1985": 428612096.0,
+            "1986": 428612096.0,
+            "1987": 428612096.0,
+            "1988": 428612096.0,
+            "1989": 428612096.0,
+            "1990": 428612096.0,
+            "1991": 428612096.0,
+            "1992": 428612096.0,
+            "1993": 428612096.0,
+            "1994": 428612096.0,
+            "1995": 428612096.0,
+            "1996": 428612096.0,
+            "1997": 428612096.0,
+            "1998": 428612096.0,
+            "1999": 428612096.0,
+            "2000": 428612096.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 2000,
+        "step_interval": 1,
+        "values": {
+            "1": 22.43653,
+            "2": 5.05,
+            "3": 4.99632,
+            "4": 5.00941,
+            "5": 5.30047,
+            "6": 5.00529,
+            "7": 4.98693,
+            "8": 5.03236,
+            "9": 5.04733,
+            "10": 5.0355,
+            "11": 5.05504,
+            "12": 5.02789,
+            "13": 5.05026,
+            "14": 5.03817,
+            "15": 5.03065,
+            "16": 5.04414,
+            "17": 5.00251,
+            "18": 4.9928,
+            "19": 4.99792,
+            "20": 4.99648,
+            "21": 5.01668,
+            "22": 4.97973,
+            "23": 5.06379,
+            "24": 5.01631,
+            "25": 4.96187,
+            "26": 4.95004,
+            "27": 4.95649,
+            "28": 4.93702,
+            "29": 4.93675,
+            "30": 4.92101,
+            "31": 4.93325,
+            "32": 4.92626,
+            "33": 4.93256,
+            "34": 4.93518,
+            "35": 4.95011,
+            "36": 4.959,
+            "37": 5.41549,
+            "38": 5.7108,
+            "39": 4.96475,
+            "40": 4.95756,
+            "41": 5.03533,
+            "42": 4.94591,
+            "43": 5.30856,
+            "44": 4.93166,
+            "45": 5.29533,
+            "46": 6.02838,
+            "47": 4.99271,
+            "48": 4.93548,
+            "49": 4.93262,
+            "50": 4.93589,
+            "51": 4.93457,
+            "52": 4.9402,
+            "53": 4.93593,
+            "54": 4.93266,
+            "55": 4.93457,
+            "56": 4.926,
+            "57": 4.94015,
+            "58": 4.93606,
+            "59": 4.92819,
+            "60": 4.92679,
+            "61": 4.92853,
+            "62": 4.93744,
+            "63": 4.93014,
+            "64": 4.92895,
+            "65": 4.92774,
+            "66": 4.9263,
+            "67": 4.92483,
+            "68": 4.91654,
+            "69": 4.95386,
+            "70": 4.95969,
+            "71": 4.97371,
+            "72": 4.96736,
+            "73": 4.98575,
+            "74": 4.968,
+            "75": 5.68071,
+            "76": 4.98487,
+            "77": 4.98651,
+            "78": 4.97441,
+            "79": 4.97854,
+            "80": 4.97886,
+            "81": 4.98163,
+            "82": 4.97647,
+            "83": 5.33849,
+            "84": 4.98394,
+            "85": 4.98,
+            "86": 4.96888,
+            "87": 4.9685,
+            "88": 5.33167,
+            "89": 5.40565,
+            "90": 4.97724,
+            "91": 6.05451,
+            "92": 4.9699,
+            "93": 4.96947,
+            "94": 4.97853,
+            "95": 5.03234,
+            "96": 4.9703,
+            "97": 4.9766,
+            "98": 4.96386,
+            "99": 4.97968,
+            "100": 4.96583,
+            "101": 4.956,
+            "102": 4.94425,
+            "103": 4.96789,
+            "104": 4.96252,
+            "105": 4.97853,
+            "106": 4.98313,
+            "107": 4.98,
+            "108": 4.97528,
+            "109": 4.98226,
+            "110": 4.98532,
+            "111": 4.95791,
+            "112": 4.95409,
+            "113": 5.66529,
+            "114": 4.96347,
+            "115": 4.99625,
+            "116": 4.99199,
+            "117": 4.98823,
+            "118": 4.98114,
+            "119": 4.97652,
+            "120": 4.98449,
+            "121": 4.98578,
+            "122": 4.98423,
+            "123": 4.9824,
+            "124": 4.98111,
+            "125": 4.98291,
+            "126": 4.98215,
+            "127": 4.98484,
+            "128": 5.35151,
+            "129": 4.9912,
+            "130": 4.99188,
+            "131": 4.98662,
+            "132": 5.34041,
+            "133": 4.98063,
+            "134": 5.33235,
+            "135": 5.69907,
+            "136": 5.33587,
+            "137": 4.98509,
+            "138": 5.61624,
+            "139": 5.23864,
+            "140": 4.92839,
+            "141": 4.95868,
+            "142": 4.93611,
+            "143": 4.9473,
+            "144": 4.9282,
+            "145": 4.93563,
+            "146": 4.92822,
+            "147": 4.94205,
+            "148": 4.94037,
+            "149": 4.93429,
+            "150": 5.62642,
+            "151": 4.93794,
+            "152": 4.9323,
+            "153": 4.93391,
+            "154": 4.93581,
+            "155": 4.93177,
+            "156": 4.93719,
+            "157": 4.93775,
+            "158": 4.93223,
+            "159": 4.9449,
+            "160": 4.93898,
+            "161": 4.94198,
+            "162": 4.9436,
+            "163": 4.9355,
+            "164": 4.93432,
+            "165": 4.9382,
+            "166": 4.94332,
+            "167": 4.93425,
+            "168": 4.93189,
+            "169": 4.92717,
+            "170": 4.94393,
+            "171": 4.94517,
+            "172": 4.92976,
+            "173": 5.303,
+            "174": 4.92818,
+            "175": 4.92924,
+            "176": 4.9385,
+            "177": 5.27801,
+            "178": 4.93182,
+            "179": 5.28092,
+            "180": 5.99722,
+            "181": 4.92656,
+            "182": 4.92594,
+            "183": 4.92947,
+            "184": 4.93087,
+            "185": 4.92967,
+            "186": 4.93088,
+            "187": 5.62908,
+            "188": 4.93498,
+            "189": 4.9476,
+            "190": 4.93843,
+            "191": 4.94101,
+            "192": 4.93265,
+            "193": 4.93046,
+            "194": 4.93133,
+            "195": 4.94044,
+            "196": 4.93997,
+            "197": 4.93336,
+            "198": 6.32096,
+            "199": 4.95042,
+            "200": 4.91888,
+            "201": 4.91803,
+            "202": 4.92212,
+            "203": 4.91738,
+            "204": 4.93431,
+            "205": 4.93078,
+            "206": 4.9288,
+            "207": 4.9431,
+            "208": 4.93288,
+            "209": 4.93152,
+            "210": 4.92297,
+            "211": 4.92152,
+            "212": 4.92078,
+            "213": 4.93382,
+            "214": 4.92203,
+            "215": 4.92628,
+            "216": 4.92759,
+            "217": 4.91972,
+            "218": 4.93018,
+            "219": 5.30587,
+            "220": 4.92639,
+            "221": 4.92815,
+            "222": 5.28345,
+            "223": 4.93513,
+            "224": 5.62954,
+            "225": 6.35198,
+            "226": 4.94108,
+            "227": 4.94033,
+            "228": 4.94077,
+            "229": 4.9445,
+            "230": 4.95277,
+            "231": 4.93684,
+            "232": 4.94258,
+            "233": 4.9386,
+            "234": 4.94149,
+            "235": 4.94872,
+            "236": 4.95361,
+            "237": 4.94924,
+            "238": 4.93722,
+            "239": 4.94342,
+            "240": 4.95029,
+            "241": 4.94512,
+            "242": 4.9423,
+            "243": 4.93861,
+            "244": 4.93578,
+            "245": 4.93502,
+            "246": 4.94519,
+            "247": 4.93658,
+            "248": 4.93761,
+            "249": 4.94583,
+            "250": 4.94414,
+            "251": 4.94331,
+            "252": 4.94044,
+            "253": 4.94317,
+            "254": 4.94161,
+            "255": 4.95295,
+            "256": 4.95044,
+            "257": 4.94816,
+            "258": 4.94006,
+            "259": 4.94409,
+            "260": 4.9408,
+            "261": 4.94791,
+            "262": 5.63079,
+            "263": 4.95361,
+            "264": 5.3219,
+            "265": 4.96046,
+            "266": 4.95564,
+            "267": 5.30372,
+            "268": 5.30618,
+            "269": 4.94954,
+            "270": 6.01622,
+            "271": 4.9509,
+            "272": 4.9579,
+            "273": 4.9529,
+            "274": 4.95339,
+            "275": 4.94721,
+            "276": 4.95053,
+            "277": 4.9434,
+            "278": 4.9389,
+            "279": 4.94021,
+            "280": 4.93862,
+            "281": 4.93834,
+            "282": 4.93985,
+            "283": 4.94183,
+            "284": 4.93716,
+            "285": 4.9443,
+            "286": 4.94305,
+            "287": 4.93467,
+            "288": 4.93816,
+            "289": 4.93749,
+            "290": 4.9349,
+            "291": 4.939,
+            "292": 4.93482,
+            "293": 4.94665,
+            "294": 4.93648,
+            "295": 4.93823,
+            "296": 4.93522,
+            "297": 4.93472,
+            "298": 4.93288,
+            "299": 5.61551,
+            "300": 4.95418,
+            "301": 4.95347,
+            "302": 4.95005,
+            "303": 4.95224,
+            "304": 5.01672,
+            "305": 4.94451,
+            "306": 4.9469,
+            "307": 4.94674,
+            "308": 4.95506,
+            "309": 5.3147,
+            "310": 4.97913,
+            "311": 5.29357,
+            "312": 4.94239,
+            "313": 5.28356,
+            "314": 5.66502,
+            "315": 5.29945,
+            "316": 4.94213,
+            "317": 4.93439,
+            "318": 4.94085,
+            "319": 4.93452,
+            "320": 4.94083,
+            "321": 4.93407,
+            "322": 4.93596,
+            "323": 4.9411,
+            "324": 4.94091,
+            "325": 4.93723,
+            "326": 4.93682,
+            "327": 4.93712,
+            "328": 4.99643,
+            "329": 4.94011,
+            "330": 4.93777,
+            "331": 4.93553,
+            "332": 4.938,
+            "333": 4.94101,
+            "334": 4.93199,
+            "335": 4.93179,
+            "336": 5.28612,
+            "337": 5.30266,
+            "338": 4.96477,
+            "339": 4.97585,
+            "340": 4.95959,
+            "341": 4.95912,
+            "342": 4.96594,
+            "343": 4.96105,
+            "344": 4.96501,
+            "345": 4.96175,
+            "346": 4.96452,
+            "347": 4.9603,
+            "348": 4.95434,
+            "349": 4.95658,
+            "350": 4.95773,
+            "351": 4.96723,
+            "352": 5.02353,
+            "353": 4.95487,
+            "354": 5.32227,
+            "355": 4.95601,
+            "356": 5.29598,
+            "357": 4.95819,
+            "358": 5.29935,
+            "359": 6.01593,
+            "360": 4.96832,
+            "361": 4.95302,
+            "362": 4.95944,
+            "363": 4.95167,
+            "364": 4.9483,
+            "365": 4.94951,
+            "366": 4.9525,
+            "367": 4.95364,
+            "368": 4.94948,
+            "369": 4.95258,
+            "370": 4.94974,
+            "371": 4.96357,
+            "372": 4.94701,
+            "373": 4.94584,
+            "374": 5.27688,
+            "375": 5.29329,
+            "376": 4.93553,
+            "377": 4.93296,
+            "378": 4.93431,
+            "379": 4.94158,
+            "380": 4.98441,
+            "381": 4.99657,
+            "382": 4.97634,
+            "383": 4.98015,
+            "384": 4.98178,
+            "385": 4.97595,
+            "386": 4.97431,
+            "387": 4.97965,
+            "388": 4.91884,
+            "389": 4.92436,
+            "390": 4.9179,
+            "391": 4.91999,
+            "392": 4.92113,
+            "393": 4.92231,
+            "394": 4.91815,
+            "395": 4.92381,
+            "396": 4.91848,
+            "397": 4.92412,
+            "398": 4.91541,
+            "399": 4.91455,
+            "400": 5.29982,
+            "401": 5.26416,
+            "402": 5.2612,
+            "403": 4.91795,
+            "404": 5.63316,
+            "405": 5.27153,
+            "406": 4.90744,
+            "407": 4.9142,
+            "408": 4.90831,
+            "409": 4.90838,
+            "410": 4.92063,
+            "411": 5.25377,
+            "412": 5.26322,
+            "413": 4.91895,
+            "414": 4.92378,
+            "415": 4.91866,
+            "416": 4.91955,
+            "417": 4.92152,
+            "418": 4.91929,
+            "419": 4.9201,
+            "420": 4.91526,
+            "421": 4.91974,
+            "422": 4.92503,
+            "423": 4.92579,
+            "424": 4.91791,
+            "425": 4.92253,
+            "426": 4.92114,
+            "427": 4.91774,
+            "428": 4.91171,
+            "429": 4.9125,
+            "430": 4.91411,
+            "431": 4.90802,
+            "432": 4.9164,
+            "433": 4.90723,
+            "434": 4.92382,
+            "435": 4.9069,
+            "436": 4.91154,
+            "437": 4.90512,
+            "438": 4.9175,
+            "439": 4.91782,
+            "440": 4.91028,
+            "441": 4.91048,
+            "442": 4.90894,
+            "443": 4.88817,
+            "444": 4.88126,
+            "445": 5.24853,
+            "446": 4.87836,
+            "447": 5.24263,
+            "448": 5.25398,
+            "449": 6.28763,
+            "450": 4.88338,
+            "451": 4.89491,
+            "452": 4.88709,
+            "453": 4.89008,
+            "454": 4.90322,
+            "455": 4.90113,
+            "456": 4.90439,
+            "457": 4.90223,
+            "458": 4.90641,
+            "459": 4.90851,
+            "460": 4.9009,
+            "461": 4.89968,
+            "462": 4.89662,
+            "463": 4.9081,
+            "464": 4.88866,
+            "465": 4.90253,
+            "466": 4.90724,
+            "467": 4.89875,
+            "468": 4.90067,
+            "469": 4.90495,
+            "470": 4.89887,
+            "471": 4.89965,
+            "472": 4.90145,
+            "473": 4.88549,
+            "474": 4.87833,
+            "475": 4.88274,
+            "476": 4.87937,
+            "477": 4.88019,
+            "478": 4.87808,
+            "479": 4.88269,
+            "480": 4.87591,
+            "481": 4.88072,
+            "482": 4.87452,
+            "483": 4.8839,
+            "484": 4.87834,
+            "485": 5.21963,
+            "486": 4.8887,
+            "487": 5.22473,
+            "488": 4.88748,
+            "489": 4.89663,
+            "490": 5.6108,
+            "491": 5.24875,
+            "492": 4.88583,
+            "493": 5.24488,
+            "494": 5.59516,
+            "495": 4.89058,
+            "496": 4.91601,
+            "497": 4.88752,
+            "498": 4.88645,
+            "499": 4.89008,
+            "500": 4.89271,
+            "501": 4.8913,
+            "502": 4.89039,
+            "503": 4.8906,
+            "504": 4.88603,
+            "505": 4.92691,
+            "506": 4.91793,
+            "507": 4.92158,
+            "508": 4.91981,
+            "509": 4.92795,
+            "510": 4.91413,
+            "511": 4.91073,
+            "512": 4.90909,
+            "513": 4.91434,
+            "514": 4.91509,
+            "515": 4.91002,
+            "516": 4.9115,
+            "517": 4.91722,
+            "518": 4.91514,
+            "519": 4.91283,
+            "520": 4.91403,
+            "521": 4.91077,
+            "522": 4.91167,
+            "523": 5.26088,
+            "524": 5.27803,
+            "525": 4.92516,
+            "526": 4.93143,
+            "527": 4.9217,
+            "528": 4.92344,
+            "529": 4.91786,
+            "530": 4.9193,
+            "531": 4.881,
+            "532": 4.87697,
+            "533": 4.88329,
+            "534": 5.23628,
+            "535": 5.26149,
+            "536": 4.88132,
+            "537": 5.23366,
+            "538": 5.92272,
+            "539": 4.8822,
+            "540": 4.87645,
+            "541": 4.87941,
+            "542": 4.8726,
+            "543": 4.87977,
+            "544": 4.88572,
+            "545": 4.97915,
+            "546": 4.94014,
+            "547": 4.9447,
+            "548": 4.94585,
+            "549": 4.93712,
+            "550": 4.95428,
+            "551": 4.9405,
+            "552": 4.94013,
+            "553": 4.94514,
+            "554": 4.94542,
+            "555": 4.94729,
+            "556": 4.93818,
+            "557": 4.94632,
+            "558": 4.95928,
+            "559": 4.94439,
+            "560": 5.29538,
+            "561": 5.29912,
+            "562": 4.95591,
+            "563": 4.94545,
+            "564": 4.9589,
+            "565": 4.9486,
+            "566": 4.94487,
+            "567": 4.94563,
+            "568": 4.96795,
+            "569": 4.96332,
+            "570": 4.95731,
+            "571": 4.95751,
+            "572": 4.94401,
+            "573": 4.94623,
+            "574": 4.9438,
+            "575": 4.9342,
+            "576": 4.93847,
+            "577": 4.94215,
+            "578": 4.94036,
+            "579": 4.95135,
+            "580": 5.28996,
+            "581": 5.66625,
+            "582": 4.93892,
+            "583": 5.64719,
+            "584": 5.28091,
+            "585": 4.95827,
+            "586": 4.95725,
+            "587": 4.96107,
+            "588": 4.95092,
+            "589": 4.95514,
+            "590": 4.94845,
+            "591": 4.94342,
+            "592": 4.9488,
+            "593": 4.93576,
+            "594": 4.93657,
+            "595": 4.93545,
+            "596": 4.93595,
+            "597": 5.29319,
+            "598": 5.28921,
+            "599": 4.95347,
+            "600": 4.94896,
+            "601": 4.94543,
+            "602": 4.95405,
+            "603": 4.94996,
+            "604": 4.94726,
+            "605": 4.94394,
+            "606": 4.9443,
+            "607": 4.99448,
+            "608": 4.93032,
+            "609": 4.96191,
+            "610": 4.95086,
+            "611": 4.94486,
+            "612": 4.94403,
+            "613": 4.94194,
+            "614": 4.94624,
+            "615": 4.94461,
+            "616": 4.96458,
+            "617": 4.94658,
+            "618": 4.94254,
+            "619": 4.93901,
+            "620": 4.94138,
+            "621": 4.94747,
+            "622": 4.95796,
+            "623": 4.94579,
+            "624": 5.30372,
+            "625": 4.94082,
+            "626": 5.66834,
+            "627": 4.93994,
+            "628": 5.97473,
+            "629": 4.94152,
+            "630": 4.94328,
+            "631": 4.9385,
+            "632": 4.9688,
+            "633": 4.93837,
+            "634": 5.25732,
+            "635": 4.9147,
+            "636": 5.25839,
+            "637": 4.92259,
+            "638": 4.91081,
+            "639": 4.92229,
+            "640": 4.92687,
+            "641": 4.91335,
+            "642": 4.91557,
+            "643": 4.91922,
+            "644": 4.91847,
+            "645": 4.92121,
+            "646": 4.92251,
+            "647": 4.91255,
+            "648": 4.91291,
+            "649": 4.91003,
+            "650": 4.90867,
+            "651": 4.91235,
+            "652": 4.90719,
+            "653": 4.90865,
+            "654": 4.90719,
+            "655": 4.91306,
+            "656": 4.90861,
+            "657": 4.90901,
+            "658": 4.91095,
+            "659": 4.90726,
+            "660": 4.90915,
+            "661": 4.91011,
+            "662": 4.90721,
+            "663": 4.90907,
+            "664": 4.91699,
+            "665": 4.91095,
+            "666": 4.90826,
+            "667": 4.90687,
+            "668": 4.90738,
+            "669": 5.25716,
+            "670": 5.25453,
+            "671": 5.28603,
+            "672": 5.25386,
+            "673": 6.29304,
+            "674": 4.91719,
+            "675": 4.9174,
+            "676": 4.92014,
+            "677": 4.92048,
+            "678": 4.90878,
+            "679": 4.90967,
+            "680": 4.90981,
+            "681": 4.91054,
+            "682": 4.90885,
+            "683": 4.90932,
+            "684": 4.915,
+            "685": 4.90701,
+            "686": 4.91124,
+            "687": 4.91733,
+            "688": 4.91577,
+            "689": 4.91189,
+            "690": 4.90854,
+            "691": 4.90631,
+            "692": 4.90689,
+            "693": 4.9142,
+            "694": 4.90933,
+            "695": 4.90064,
+            "696": 4.88962,
+            "697": 4.89317,
+            "698": 4.89665,
+            "699": 4.90473,
+            "700": 4.90675,
+            "701": 4.90072,
+            "702": 4.90347,
+            "703": 4.90535,
+            "704": 4.90243,
+            "705": 4.90653,
+            "706": 4.90494,
+            "707": 4.90715,
+            "708": 4.89971,
+            "709": 5.25068,
+            "710": 5.24447,
+            "711": 4.91173,
+            "712": 4.91607,
+            "713": 5.26011,
+            "714": 4.90966,
+            "715": 4.90512,
+            "716": 5.63181,
+            "717": 5.62011,
+            "718": 5.23301,
+            "719": 4.91317,
+            "720": 4.90779,
+            "721": 4.90675,
+            "722": 4.90612,
+            "723": 4.90554,
+            "724": 4.90952,
+            "725": 4.90669,
+            "726": 4.90589,
+            "727": 4.9062,
+            "728": 4.91028,
+            "729": 4.905,
+            "730": 4.90848,
+            "731": 4.90621,
+            "732": 4.91216,
+            "733": 4.90248,
+            "734": 4.90051,
+            "735": 4.90319,
+            "736": 4.90401,
+            "737": 4.90646,
+            "738": 4.90558,
+            "739": 4.90438,
+            "740": 4.90694,
+            "741": 4.9036,
+            "742": 4.90521,
+            "743": 4.90326,
+            "744": 4.90534,
+            "745": 4.90658,
+            "746": 5.24876,
+            "747": 4.91293,
+            "748": 5.24944,
+            "749": 4.90712,
+            "750": 4.90572,
+            "751": 4.90977,
+            "752": 4.90683,
+            "753": 4.90815,
+            "754": 4.90611,
+            "755": 4.91427,
+            "756": 4.9129,
+            "757": 4.91264,
+            "758": 5.25755,
+            "759": 4.91199,
+            "760": 5.2647,
+            "761": 4.91559,
+            "762": 5.64712,
+            "763": 5.59149,
+            "764": 4.91566,
+            "765": 4.91348,
+            "766": 4.92052,
+            "767": 4.9149,
+            "768": 4.91624,
+            "769": 4.90919,
+            "770": 4.9208,
+            "771": 4.9111,
+            "772": 4.91242,
+            "773": 4.91183,
+            "774": 4.91856,
+            "775": 4.91524,
+            "776": 4.91642,
+            "777": 4.91271,
+            "778": 4.91587,
+            "779": 4.91173,
+            "780": 4.9163,
+            "781": 4.9101,
+            "782": 4.90927,
+            "783": 4.91594,
+            "784": 5.27562,
+            "785": 5.29399,
+            "786": 4.92064,
+            "787": 4.92508,
+            "788": 4.91936,
+            "789": 4.92025,
+            "790": 4.92839,
+            "791": 4.91829,
+            "792": 4.9234,
+            "793": 4.92615,
+            "794": 4.91968,
+            "795": 4.91417,
+            "796": 4.89214,
+            "797": 4.87642,
+            "798": 4.87726,
+            "799": 4.88691,
+            "800": 4.87753,
+            "801": 4.90361,
+            "802": 4.91538,
+            "803": 5.25822,
+            "804": 5.25769,
+            "805": 4.90985,
+            "806": 4.91228,
+            "807": 5.6423,
+            "808": 5.23836,
+            "809": 4.9314,
+            "810": 4.91226,
+            "811": 4.91382,
+            "812": 4.91588,
+            "813": 4.91005,
+            "814": 4.9202,
+            "815": 4.90766,
+            "816": 4.90744,
+            "817": 4.91497,
+            "818": 4.91,
+            "819": 4.90572,
+            "820": 4.91342,
+            "821": 5.26215,
+            "822": 5.25971,
+            "823": 4.92486,
+            "824": 4.92645,
+            "825": 4.91518,
+            "826": 4.91893,
+            "827": 4.90862,
+            "828": 4.9143,
+            "829": 4.91422,
+            "830": 4.91829,
+            "831": 4.90569,
+            "832": 4.91122,
+            "833": 4.90584,
+            "834": 4.90518,
+            "835": 4.90755,
+            "836": 4.90656,
+            "837": 4.90626,
+            "838": 4.90987,
+            "839": 4.91189,
+            "840": 4.90735,
+            "841": 4.90697,
+            "842": 4.91064,
+            "843": 4.90409,
+            "844": 4.90711,
+            "845": 4.90385,
+            "846": 4.90599,
+            "847": 5.24636,
+            "848": 4.89752,
+            "849": 5.24655,
+            "850": 4.90148,
+            "851": 4.89501,
+            "852": 5.98483,
+            "853": 4.89468,
+            "854": 4.89653,
+            "855": 4.8954,
+            "856": 4.89811,
+            "857": 4.90026,
+            "858": 5.24069,
+            "859": 4.91345,
+            "860": 5.2538,
+            "861": 4.91107,
+            "862": 4.90905,
+            "863": 4.90289,
+            "864": 4.90179,
+            "865": 4.90697,
+            "866": 4.89969,
+            "867": 4.89622,
+            "868": 4.89817,
+            "869": 4.89734,
+            "870": 4.89421,
+            "871": 4.902,
+            "872": 4.89737,
+            "873": 4.90082,
+            "874": 4.8986,
+            "875": 4.9034,
+            "876": 4.90213,
+            "877": 4.89969,
+            "878": 4.90652,
+            "879": 4.90216,
+            "880": 4.90541,
+            "881": 4.90491,
+            "882": 4.89798,
+            "883": 4.89325,
+            "884": 4.89662,
+            "885": 4.91,
+            "886": 4.89481,
+            "887": 4.90025,
+            "888": 4.89887,
+            "889": 4.89458,
+            "890": 4.89351,
+            "891": 4.89343,
+            "892": 5.24625,
+            "893": 4.90075,
+            "894": 5.24719,
+            "895": 4.89439,
+            "896": 5.95508,
+            "897": 5.92842,
+            "898": 4.90126,
+            "899": 4.91443,
+            "900": 4.90222,
+            "901": 4.89928,
+            "902": 4.89952,
+            "903": 4.89905,
+            "904": 4.90536,
+            "905": 4.90627,
+            "906": 4.90188,
+            "907": 4.90671,
+            "908": 4.90531,
+            "909": 4.90614,
+            "910": 4.90319,
+            "911": 4.90668,
+            "912": 4.90614,
+            "913": 4.90641,
+            "914": 4.90219,
+            "915": 4.89858,
+            "916": 4.89788,
+            "917": 4.90114,
+            "918": 4.89062,
+            "919": 4.89675,
+            "920": 4.89412,
+            "921": 4.89851,
+            "922": 4.90258,
+            "923": 4.89837,
+            "924": 4.89168,
+            "925": 4.90558,
+            "926": 4.88926,
+            "927": 4.89631,
+            "928": 4.89481,
+            "929": 4.89896,
+            "930": 4.90349,
+            "931": 4.90254,
+            "932": 4.89424,
+            "933": 5.2393,
+            "934": 4.90447,
+            "935": 5.24957,
+            "936": 4.89799,
+            "937": 5.24757,
+            "938": 4.90497,
+            "939": 5.26023,
+            "940": 4.905,
+            "941": 4.90603,
+            "942": 5.89013,
+            "943": 5.2754,
+            "944": 4.89903,
+            "945": 4.90825,
+            "946": 4.90072,
+            "947": 4.91095,
+            "948": 4.89642,
+            "949": 4.90314,
+            "950": 4.9027,
+            "951": 4.90276,
+            "952": 4.90005,
+            "953": 4.90591,
+            "954": 4.89179,
+            "955": 4.89648,
+            "956": 4.89739,
+            "957": 4.90258,
+            "958": 4.90027,
+            "959": 4.90627,
+            "960": 4.89592,
+            "961": 4.89153,
+            "962": 4.89826,
+            "963": 4.89281,
+            "964": 4.88656,
+            "965": 4.9056,
+            "966": 4.88948,
+            "967": 4.89075,
+            "968": 4.89128,
+            "969": 4.88907,
+            "970": 5.23384,
+            "971": 4.91197,
+            "972": 5.24458,
+            "973": 4.90766,
+            "974": 4.90557,
+            "975": 4.9059,
+            "976": 4.90502,
+            "977": 4.90392,
+            "978": 4.90541,
+            "979": 4.89927,
+            "980": 4.9047,
+            "981": 4.90276,
+            "982": 5.2516,
+            "983": 5.25121,
+            "984": 4.90232,
+            "985": 4.90209,
+            "986": 5.26939,
+            "987": 5.52932,
+            "988": 5.28293,
+            "989": 4.91742,
+            "990": 4.90637,
+            "991": 4.90953,
+            "992": 4.90864,
+            "993": 4.9075,
+            "994": 4.90696,
+            "995": 4.90473,
+            "996": 4.90192,
+            "997": 4.90199,
+            "998": 4.89181,
+            "999": 4.89111,
+            "1000": 4.89025,
+            "1001": 4.9168,
+            "1002": 4.90983,
+            "1003": 4.91875,
+            "1004": 4.90892,
+            "1005": 4.92588,
+            "1006": 4.91678,
+            "1007": 5.262,
+            "1008": 4.92447,
+            "1009": 5.26729,
+            "1010": 4.92803,
+            "1011": 4.92461,
+            "1012": 4.92338,
+            "1013": 4.9218,
+            "1014": 4.92051,
+            "1015": 4.92442,
+            "1016": 4.91248,
+            "1017": 4.92113,
+            "1018": 4.92046,
+            "1019": 4.91949,
+            "1020": 4.92623,
+            "1021": 4.92267,
+            "1022": 4.92249,
+            "1023": 4.91899,
+            "1024": 4.92062,
+            "1025": 5.26804,
+            "1026": 4.92131,
+            "1027": 5.26954,
+            "1028": 4.91856,
+            "1029": 4.91681,
+            "1030": 5.90813,
+            "1031": 4.92456,
+            "1032": 4.92325,
+            "1033": 5.3083,
+            "1034": 4.91916,
+            "1035": 4.91422,
+            "1036": 4.91293,
+            "1037": 4.91223,
+            "1038": 4.9211,
+            "1039": 4.92393,
+            "1040": 4.92009,
+            "1041": 4.92106,
+            "1042": 4.9242,
+            "1043": 4.92005,
+            "1044": 5.26878,
+            "1045": 4.92668,
+            "1046": 4.93095,
+            "1047": 5.27312,
+            "1048": 4.92622,
+            "1049": 4.92229,
+            "1050": 4.92078,
+            "1051": 4.9252,
+            "1052": 4.92398,
+            "1053": 4.92467,
+            "1054": 4.92254,
+            "1055": 4.92721,
+            "1056": 4.92594,
+            "1057": 4.93074,
+            "1058": 4.9202,
+            "1059": 4.92339,
+            "1060": 4.92936,
+            "1061": 4.92316,
+            "1062": 4.91832,
+            "1063": 4.9324,
+            "1064": 4.96238,
+            "1065": 4.94321,
+            "1066": 4.96241,
+            "1067": 4.93128,
+            "1068": 4.92665,
+            "1069": 4.93217,
+            "1070": 5.29473,
+            "1071": 5.27044,
+            "1072": 4.91774,
+            "1073": 4.92979,
+            "1074": 5.30092,
+            "1075": 5.57166,
+            "1076": 4.9336,
+            "1077": 4.91975,
+            "1078": 5.29838,
+            "1079": 4.92345,
+            "1080": 4.92265,
+            "1081": 4.93832,
+            "1082": 5.28966,
+            "1083": 4.94183,
+            "1084": 5.28091,
+            "1085": 4.94506,
+            "1086": 4.94668,
+            "1087": 4.94028,
+            "1088": 4.93858,
+            "1089": 4.93937,
+            "1090": 4.9454,
+            "1091": 4.95599,
+            "1092": 4.95023,
+            "1093": 4.94499,
+            "1094": 4.96028,
+            "1095": 4.95213,
+            "1096": 4.96406,
+            "1097": 4.93905,
+            "1098": 4.92198,
+            "1099": 4.93824,
+            "1100": 4.92789,
+            "1101": 4.92981,
+            "1102": 4.93937,
+            "1103": 4.91985,
+            "1104": 4.91889,
+            "1105": 4.93785,
+            "1106": 4.94007,
+            "1107": 4.93618,
+            "1108": 4.94002,
+            "1109": 4.96964,
+            "1110": 4.93965,
+            "1111": 4.89692,
+            "1112": 4.89611,
+            "1113": 4.89245,
+            "1114": 5.24194,
+            "1115": 4.89604,
+            "1116": 5.23738,
+            "1117": 4.89591,
+            "1118": 4.89712,
+            "1119": 6.2207,
+            "1120": 4.89707,
+            "1121": 5.24025,
+            "1122": 4.89987,
+            "1123": 5.27914,
+            "1124": 4.9043,
+            "1125": 4.89477,
+            "1126": 4.89625,
+            "1127": 4.90132,
+            "1128": 4.90216,
+            "1129": 4.90398,
+            "1130": 4.89594,
+            "1131": 4.90153,
+            "1132": 4.89796,
+            "1133": 4.89536,
+            "1134": 4.89807,
+            "1135": 4.89858,
+            "1136": 4.89867,
+            "1137": 4.89681,
+            "1138": 4.92931,
+            "1139": 4.92599,
+            "1140": 4.89538,
+            "1141": 4.89732,
+            "1142": 4.89242,
+            "1143": 4.89262,
+            "1144": 4.89274,
+            "1145": 4.93085,
+            "1146": 4.9294,
+            "1147": 4.92891,
+            "1148": 4.91881,
+            "1149": 4.89129,
+            "1150": 4.89171,
+            "1151": 4.8862,
+            "1152": 4.89315,
+            "1153": 4.89463,
+            "1154": 4.89481,
+            "1155": 4.89194,
+            "1156": 5.23303,
+            "1157": 4.89025,
+            "1158": 4.89312,
+            "1159": 5.24533,
+            "1160": 5.25573,
+            "1161": 5.23949,
+            "1162": 4.8914,
+            "1163": 4.89247,
+            "1164": 4.8896,
+            "1165": 5.88618,
+            "1166": 4.91824,
+            "1167": 4.89232,
+            "1168": 5.27914,
+            "1169": 4.88638,
+            "1170": 4.89624,
+            "1171": 4.90097,
+            "1172": 4.89335,
+            "1173": 4.90022,
+            "1174": 4.88823,
+            "1175": 4.91533,
+            "1176": 4.91702,
+            "1177": 4.91026,
+            "1178": 4.89204,
+            "1179": 4.89341,
+            "1180": 4.88754,
+            "1181": 4.89101,
+            "1182": 4.89528,
+            "1183": 4.89482,
+            "1184": 4.88208,
+            "1185": 4.87829,
+            "1186": 4.88501,
+            "1187": 4.88593,
+            "1188": 4.87526,
+            "1189": 4.88604,
+            "1190": 4.90872,
+            "1191": 4.88218,
+            "1192": 4.8826,
+            "1193": 4.88606,
+            "1194": 5.22378,
+            "1195": 4.88192,
+            "1196": 4.8877,
+            "1197": 5.23842,
+            "1198": 4.89888,
+            "1199": 4.89039,
+            "1200": 4.89543,
+            "1201": 4.8917,
+            "1202": 4.88928,
+            "1203": 4.88428,
+            "1204": 4.91394,
+            "1205": 5.27535,
+            "1206": 5.27273,
+            "1207": 4.92919,
+            "1208": 4.92498,
+            "1209": 5.60645,
+            "1210": 5.23108,
+            "1211": 4.91823,
+            "1212": 4.91107,
+            "1213": 4.90706,
+            "1214": 5.33395,
+            "1215": 4.91341,
+            "1216": 4.92296,
+            "1217": 4.92797,
+            "1218": 4.91436,
+            "1219": 4.93183,
+            "1220": 4.92763,
+            "1221": 4.91189,
+            "1222": 4.91524,
+            "1223": 4.92927,
+            "1224": 4.90762,
+            "1225": 4.91646,
+            "1226": 4.95199,
+            "1227": 4.93657,
+            "1228": 4.91049,
+            "1229": 4.90576,
+            "1230": 4.92418,
+            "1231": 5.24788,
+            "1232": 4.90922,
+            "1233": 4.90828,
+            "1234": 5.28741,
+            "1235": 4.93359,
+            "1236": 4.92651,
+            "1237": 4.92759,
+            "1238": 4.91812,
+            "1239": 4.96161,
+            "1240": 4.92462,
+            "1241": 4.9408,
+            "1242": 4.95151,
+            "1243": 4.92866,
+            "1244": 4.94942,
+            "1245": 4.93202,
+            "1246": 4.93118,
+            "1247": 4.92787,
+            "1248": 4.93195,
+            "1249": 5.31148,
+            "1250": 4.96525,
+            "1251": 5.27677,
+            "1252": 4.95992,
+            "1253": 4.89092,
+            "1254": 5.87598,
+            "1255": 4.89013,
+            "1256": 4.89328,
+            "1257": 4.88679,
+            "1258": 4.89107,
+            "1259": 5.26785,
+            "1260": 4.89071,
+            "1261": 4.89005,
+            "1262": 4.89216,
+            "1263": 4.89212,
+            "1264": 4.88574,
+            "1265": 4.88902,
+            "1266": 4.88642,
+            "1267": 4.89574,
+            "1268": 4.88631,
+            "1269": 5.22724,
+            "1270": 4.88943,
+            "1271": 5.23761,
+            "1272": 4.90353,
+            "1273": 4.89726,
+            "1274": 4.92161,
+            "1275": 4.92347,
+            "1276": 4.91698,
+            "1277": 4.92233,
+            "1278": 4.91979,
+            "1279": 4.9211,
+            "1280": 4.9179,
+            "1281": 4.92209,
+            "1282": 4.94485,
+            "1283": 4.92932,
+            "1284": 4.92976,
+            "1285": 4.91788,
+            "1286": 4.93408,
+            "1287": 4.92359,
+            "1288": 4.92166,
+            "1289": 4.9185,
+            "1290": 4.91424,
+            "1291": 4.91891,
+            "1292": 4.92028,
+            "1293": 4.9117,
+            "1294": 5.27044,
+            "1295": 5.29676,
+            "1296": 4.91703,
+            "1297": 4.92056,
+            "1298": 4.92207,
+            "1299": 5.91394,
+            "1300": 4.9147,
+            "1301": 4.9131,
+            "1302": 4.9176,
+            "1303": 4.93425,
+            "1304": 5.304,
+            "1305": 4.91978,
+            "1306": 5.27498,
+            "1307": 4.92043,
+            "1308": 4.91675,
+            "1309": 5.27831,
+            "1310": 4.93667,
+            "1311": 4.93075,
+            "1312": 4.92766,
+            "1313": 4.92554,
+            "1314": 4.93753,
+            "1315": 4.93323,
+            "1316": 4.92326,
+            "1317": 4.92226,
+            "1318": 4.9254,
+            "1319": 4.91683,
+            "1320": 4.91352,
+            "1321": 4.93361,
+            "1322": 4.9202,
+            "1323": 4.92888,
+            "1324": 4.94749,
+            "1325": 4.92427,
+            "1326": 4.91993,
+            "1327": 4.94147,
+            "1328": 4.91569,
+            "1329": 4.9082,
+            "1330": 4.90808,
+            "1331": 4.92463,
+            "1332": 4.94304,
+            "1333": 4.91833,
+            "1334": 4.91915,
+            "1335": 4.9569,
+            "1336": 4.91253,
+            "1337": 4.91228,
+            "1338": 4.91599,
+            "1339": 5.26886,
+            "1340": 4.94108,
+            "1341": 5.28895,
+            "1342": 4.92166,
+            "1343": 4.93148,
+            "1344": 6.20454,
+            "1345": 4.93732,
+            "1346": 4.94109,
+            "1347": 5.28178,
+            "1348": 4.92597,
+            "1349": 5.31528,
+            "1350": 4.93124,
+            "1351": 4.9199,
+            "1352": 4.92145,
+            "1353": 4.91761,
+            "1354": 4.91599,
+            "1355": 4.91867,
+            "1356": 4.92286,
+            "1357": 4.91965,
+            "1358": 4.92454,
+            "1359": 4.92188,
+            "1360": 4.91921,
+            "1361": 4.92021,
+            "1362": 4.92372,
+            "1363": 4.91207,
+            "1364": 4.96107,
+            "1365": 4.91388,
+            "1366": 4.91683,
+            "1367": 4.91413,
+            "1368": 4.91691,
+            "1369": 4.91871,
+            "1370": 4.92278,
+            "1371": 4.92605,
+            "1372": 4.92653,
+            "1373": 4.9264,
+            "1374": 4.92864,
+            "1375": 4.92839,
+            "1376": 4.93185,
+            "1377": 4.92304,
+            "1378": 4.92916,
+            "1379": 4.92701,
+            "1380": 4.92797,
+            "1381": 5.27325,
+            "1382": 4.89544,
+            "1383": 4.89064,
+            "1384": 5.60494,
+            "1385": 5.00482,
+            "1386": 5.33879,
+            "1387": 4.92912,
+            "1388": 4.92575,
+            "1389": 5.83703,
+            "1390": 4.91691,
+            "1391": 4.91717,
+            "1392": 4.92005,
+            "1393": 4.92211,
+            "1394": 4.91895,
+            "1395": 5.29903,
+            "1396": 4.92143,
+            "1397": 4.91551,
+            "1398": 4.91427,
+            "1399": 4.91348,
+            "1400": 4.92556,
+            "1401": 4.92553,
+            "1402": 4.91884,
+            "1403": 4.91856,
+            "1404": 4.95579,
+            "1405": 4.88917,
+            "1406": 4.88886,
+            "1407": 4.90262,
+            "1408": 4.88379,
+            "1409": 4.88976,
+            "1410": 4.88681,
+            "1411": 4.8751,
+            "1412": 4.89308,
+            "1413": 4.89122,
+            "1414": 4.88458,
+            "1415": 4.89489,
+            "1416": 4.88438,
+            "1417": 4.88183,
+            "1418": 5.229,
+            "1419": 4.96736,
+            "1420": 4.95735,
+            "1421": 5.29839,
+            "1422": 4.92896,
+            "1423": 4.9679,
+            "1424": 4.96109,
+            "1425": 4.96048,
+            "1426": 4.95854,
+            "1427": 4.95558,
+            "1428": 4.90503,
+            "1429": 5.24486,
+            "1430": 5.24901,
+            "1431": 4.8987,
+            "1432": 4.89075,
+            "1433": 5.22736,
+            "1434": 5.47175,
+            "1435": 4.89209,
+            "1436": 4.8986,
+            "1437": 4.8891,
+            "1438": 4.88697,
+            "1439": 4.88974,
+            "1440": 5.27298,
+            "1441": 4.89403,
+            "1442": 4.90495,
+            "1443": 4.89585,
+            "1444": 4.89766,
+            "1445": 4.89344,
+            "1446": 4.89618,
+            "1447": 4.88721,
+            "1448": 4.88735,
+            "1449": 4.89401,
+            "1450": 4.89435,
+            "1451": 4.89143,
+            "1452": 4.88553,
+            "1453": 4.89139,
+            "1454": 4.89347,
+            "1455": 5.23147,
+            "1456": 4.8987,
+            "1457": 4.90447,
+            "1458": 4.89553,
+            "1459": 5.23187,
+            "1460": 4.90546,
+            "1461": 4.89293,
+            "1462": 4.89652,
+            "1463": 4.88806,
+            "1464": 4.94852,
+            "1465": 4.89339,
+            "1466": 4.88888,
+            "1467": 4.89409,
+            "1468": 4.89028,
+            "1469": 4.89198,
+            "1470": 4.89499,
+            "1471": 4.89853,
+            "1472": 4.89989,
+            "1473": 5.245,
+            "1474": 4.89244,
+            "1475": 5.24744,
+            "1476": 4.88786,
+            "1477": 4.88954,
+            "1478": 5.81074,
+            "1479": 4.90603,
+            "1480": 4.8817,
+            "1481": 4.88853,
+            "1482": 4.88913,
+            "1483": 4.88525,
+            "1484": 4.88091,
+            "1485": 5.26103,
+            "1486": 4.88332,
+            "1487": 4.88482,
+            "1488": 4.88349,
+            "1489": 4.93535,
+            "1490": 4.93713,
+            "1491": 4.94008,
+            "1492": 4.93273,
+            "1493": 5.26558,
+            "1494": 4.92625,
+            "1495": 4.93119,
+            "1496": 4.93326,
+            "1497": 5.29661,
+            "1498": 4.94651,
+            "1499": 4.94563,
+            "1500": 4.94732,
+            "1501": 4.94956,
+            "1502": 4.93949,
+            "1503": 4.94314,
+            "1504": 4.949,
+            "1505": 4.93848,
+            "1506": 4.93655,
+            "1507": 4.93352,
+            "1508": 4.93376,
+            "1509": 4.93575,
+            "1510": 4.93237,
+            "1511": 4.93325,
+            "1512": 4.93443,
+            "1513": 4.93608,
+            "1514": 4.92875,
+            "1515": 4.93822,
+            "1516": 4.92271,
+            "1517": 4.93602,
+            "1518": 4.93135,
+            "1519": 5.28269,
+            "1520": 5.28601,
+            "1521": 4.93214,
+            "1522": 4.93238,
+            "1523": 4.9331,
+            "1524": 5.84985,
+            "1525": 4.93183,
+            "1526": 4.9312,
+            "1527": 4.94067,
+            "1528": 4.94179,
+            "1529": 4.93283,
+            "1530": 5.64255,
+            "1531": 4.93012,
+            "1532": 4.93237,
+            "1533": 4.93188,
+            "1534": 5.28642,
+            "1535": 4.93295,
+            "1536": 4.93351,
+            "1537": 4.93687,
+            "1538": 4.93395,
+            "1539": 4.93892,
+            "1540": 4.93329,
+            "1541": 4.93178,
+            "1542": 4.94011,
+            "1543": 4.93223,
+            "1544": 4.9238,
+            "1545": 4.93295,
+            "1546": 4.92789,
+            "1547": 4.92723,
+            "1548": 4.93344,
+            "1549": 4.93081,
+            "1550": 4.93484,
+            "1551": 4.93247,
+            "1552": 4.94286,
+            "1553": 4.93871,
+            "1554": 4.9346,
+            "1555": 4.93508,
+            "1556": 4.93254,
+            "1557": 4.93621,
+            "1558": 4.93402,
+            "1559": 4.92552,
+            "1560": 4.92871,
+            "1561": 4.9342,
+            "1562": 4.93981,
+            "1563": 4.94231,
+            "1564": 5.28559,
+            "1565": 5.2926,
+            "1566": 4.93393,
+            "1567": 5.27554,
+            "1568": 5.55669,
+            "1569": 5.22897,
+            "1570": 4.93426,
+            "1571": 5.28382,
+            "1572": 4.94938,
+            "1573": 4.95055,
+            "1574": 4.94811,
+            "1575": 4.9489,
+            "1576": 5.33208,
+            "1577": 4.94524,
+            "1578": 4.94592,
+            "1579": 4.94832,
+            "1580": 4.94832,
+            "1581": 4.94408,
+            "1582": 4.93963,
+            "1583": 5.06791,
+            "1584": 4.93161,
+            "1585": 4.93335,
+            "1586": 4.93849,
+            "1587": 4.93237,
+            "1588": 4.93556,
+            "1589": 4.93066,
+            "1590": 4.94768,
+            "1591": 4.93099,
+            "1592": 4.93258,
+            "1593": 4.93981,
+            "1594": 4.92949,
+            "1595": 4.93453,
+            "1596": 4.92827,
+            "1597": 4.92584,
+            "1598": 4.93755,
+            "1599": 4.92974,
+            "1600": 4.94804,
+            "1601": 4.93191,
+            "1602": 4.93369,
+            "1603": 4.93286,
+            "1604": 4.93069,
+            "1605": 5.27051,
+            "1606": 4.92329,
+            "1607": 4.92495,
+            "1608": 5.27779,
+            "1609": 5.28346,
+            "1610": 5.29602,
+            "1611": 4.94123,
+            "1612": 4.93638,
+            "1613": 5.856,
+            "1614": 4.94437,
+            "1615": 4.93653,
+            "1616": 4.93875,
+            "1617": 4.93536,
+            "1618": 4.93896,
+            "1619": 4.93356,
+            "1620": 4.93572,
+            "1621": 5.31736,
+            "1622": 4.94531,
+            "1623": 4.94225,
+            "1624": 4.94386,
+            "1625": 4.93406,
+            "1626": 4.93798,
+            "1627": 4.93633,
+            "1628": 4.93917,
+            "1629": 4.93696,
+            "1630": 4.93053,
+            "1631": 4.92648,
+            "1632": 4.92658,
+            "1633": 4.93841,
+            "1634": 4.93342,
+            "1635": 4.9359,
+            "1636": 4.93181,
+            "1637": 4.93503,
+            "1638": 4.93642,
+            "1639": 4.93683,
+            "1640": 4.93436,
+            "1641": 4.9443,
+            "1642": 5.27794,
+            "1643": 4.94268,
+            "1644": 4.91864,
+            "1645": 4.92135,
+            "1646": 5.26653,
+            "1647": 4.93155,
+            "1648": 4.94793,
+            "1649": 4.92681,
+            "1650": 4.92909,
+            "1651": 4.92222,
+            "1652": 4.93308,
+            "1653": 5.27802,
+            "1654": 5.27831,
+            "1655": 4.92527,
+            "1656": 4.92184,
+            "1657": 4.92535,
+            "1658": 5.84478,
+            "1659": 4.93415,
+            "1660": 4.98533,
+            "1661": 4.95752,
+            "1662": 4.94766,
+            "1663": 4.94933,
+            "1664": 4.95355,
+            "1665": 4.94643,
+            "1666": 5.33217,
+            "1667": 4.93611,
+            "1668": 4.93532,
+            "1669": 4.9092,
+            "1670": 4.90894,
+            "1671": 4.9204,
+            "1672": 4.92236,
+            "1673": 4.9082,
+            "1674": 4.91286,
+            "1675": 4.90919,
+            "1676": 4.90864,
+            "1677": 4.91312,
+            "1678": 4.90871,
+            "1679": 4.92308,
+            "1680": 5.26267,
+            "1681": 4.92022,
+            "1682": 4.91096,
+            "1683": 4.91568,
+            "1684": 5.26065,
+            "1685": 4.90909,
+            "1686": 4.90718,
+            "1687": 4.91023,
+            "1688": 4.91504,
+            "1689": 4.9123,
+            "1690": 4.91353,
+            "1691": 4.90838,
+            "1692": 4.90311,
+            "1693": 4.90235,
+            "1694": 4.90376,
+            "1695": 4.90901,
+            "1696": 4.90724,
+            "1697": 4.91094,
+            "1698": 5.25776,
+            "1699": 4.91455,
+            "1700": 5.2613,
+            "1701": 4.90973,
+            "1702": 4.90149,
+            "1703": 5.82797,
+            "1704": 4.9102,
+            "1705": 4.91831,
+            "1706": 4.90187,
+            "1707": 4.89945,
+            "1708": 4.89865,
+            "1709": 4.89632,
+            "1710": 4.90065,
+            "1711": 5.28146,
+            "1712": 4.90271,
+            "1713": 4.90852,
+            "1714": 4.90365,
+            "1715": 4.90463,
+            "1716": 4.91059,
+            "1717": 5.24655,
+            "1718": 4.91868,
+            "1719": 4.90569,
+            "1720": 4.91426,
+            "1721": 4.91116,
+            "1722": 5.25454,
+            "1723": 4.91058,
+            "1724": 4.90906,
+            "1725": 4.92075,
+            "1726": 4.91839,
+            "1727": 4.91564,
+            "1728": 4.91131,
+            "1729": 4.91291,
+            "1730": 4.90884,
+            "1731": 4.91062,
+            "1732": 4.90638,
+            "1733": 4.9061,
+            "1734": 4.90658,
+            "1735": 4.91543,
+            "1736": 4.90614,
+            "1737": 4.91107,
+            "1738": 4.91084,
+            "1739": 4.90842,
+            "1740": 4.91418,
+            "1741": 4.90881,
+            "1742": 4.90792,
+            "1743": 5.26397,
+            "1744": 4.91738,
+            "1745": 5.25587,
+            "1746": 4.90599,
+            "1747": 4.90321,
+            "1748": 5.78796,
+            "1749": 4.90348,
+            "1750": 4.90858,
+            "1751": 4.89993,
+            "1752": 4.90938,
+            "1753": 4.90593,
+            "1754": 5.25406,
+            "1755": 4.9167,
+            "1756": 4.92732,
+            "1757": 5.32154,
+            "1758": 4.93234,
+            "1759": 5.25874,
+            "1760": 4.90683,
+            "1761": 4.90629,
+            "1762": 4.91525,
+            "1763": 4.91544,
+            "1764": 4.91062,
+            "1765": 4.90636,
+            "1766": 4.90873,
+            "1767": 4.91142,
+            "1768": 4.96573,
+            "1769": 4.90448,
+            "1770": 4.8891,
+            "1771": 4.8932,
+            "1772": 4.88066,
+            "1773": 4.87927,
+            "1774": 4.87496,
+            "1775": 4.90017,
+            "1776": 4.88861,
+            "1777": 4.88943,
+            "1778": 4.88632,
+            "1779": 4.89539,
+            "1780": 4.88673,
+            "1781": 4.89482,
+            "1782": 4.89261,
+            "1783": 4.88921,
+            "1784": 4.89935,
+            "1785": 4.88986,
+            "1786": 4.89061,
+            "1787": 4.88853,
+            "1788": 5.24035,
+            "1789": 5.24993,
+            "1790": 4.91207,
+            "1791": 4.91991,
+            "1792": 5.55415,
+            "1793": 5.49039,
+            "1794": 4.899,
+            "1795": 4.88922,
+            "1796": 5.25127,
+            "1797": 4.89889,
+            "1798": 4.90442,
+            "1799": 4.89627,
+            "1800": 4.89346,
+            "1801": 4.89082,
+            "1802": 5.2731,
+            "1803": 4.89886,
+            "1804": 4.87379,
+            "1805": 4.87577,
+            "1806": 4.88484,
+            "1807": 4.87576,
+            "1808": 4.86783,
+            "1809": 4.8917,
+            "1810": 4.87329,
+            "1811": 4.87182,
+            "1812": 4.8594,
+            "1813": 4.86213,
+            "1814": 4.86701,
+            "1815": 4.86025,
+            "1816": 4.86454,
+            "1817": 4.86162,
+            "1818": 4.85688,
+            "1819": 4.85907,
+            "1820": 4.85765,
+            "1821": 4.85878,
+            "1822": 4.86537,
+            "1823": 4.86101,
+            "1824": 4.86218,
+            "1825": 4.86082,
+            "1826": 4.85916,
+            "1827": 4.86304,
+            "1828": 4.86335,
+            "1829": 4.85846,
+            "1830": 5.21054,
+            "1831": 4.87227,
+            "1832": 5.20618,
+            "1833": 4.86815,
+            "1834": 5.55416,
+            "1835": 4.87798,
+            "1836": 4.89752,
+            "1837": 5.79486,
+            "1838": 4.90553,
+            "1839": 4.90533,
+            "1840": 4.89368,
+            "1841": 4.89475,
+            "1842": 4.89469,
+            "1843": 4.88557,
+            "1844": 4.89,
+            "1845": 4.88668,
+            "1846": 4.89537,
+            "1847": 5.26263,
+            "1848": 4.89245,
+            "1849": 4.89348,
+            "1850": 4.88835,
+            "1851": 4.90708,
+            "1852": 4.90228,
+            "1853": 4.86785,
+            "1854": 4.87736,
+            "1855": 4.87369,
+            "1856": 4.87811,
+            "1857": 4.90299,
+            "1858": 4.88442,
+            "1859": 4.87297,
+            "1860": 4.89531,
+            "1861": 4.90241,
+            "1862": 4.89309,
+            "1863": 4.89512,
+            "1864": 4.90549,
+            "1865": 4.90854,
+            "1866": 4.9047,
+            "1867": 5.2401,
+            "1868": 4.89946,
+            "1869": 4.90883,
+            "1870": 4.90522,
+            "1871": 4.93888,
+            "1872": 5.21372,
+            "1873": 4.87709,
+            "1874": 4.86464,
+            "1875": 4.87233,
+            "1876": 4.88054,
+            "1877": 4.84923,
+            "1878": 5.17207,
+            "1879": 5.1976,
+            "1880": 4.8445,
+            "1881": 4.84388,
+            "1882": 4.84797,
+            "1883": 5.73664,
+            "1884": 4.84672,
+            "1885": 4.84557,
+            "1886": 4.85201,
+            "1887": 4.85018,
+            "1888": 4.84932,
+            "1889": 4.85617,
+            "1890": 4.84416,
+            "1891": 4.85089,
+            "1892": 4.84881,
+            "1893": 5.22668,
+            "1894": 4.8491,
+            "1895": 4.84681,
+            "1896": 4.84529,
+            "1897": 4.84998,
+            "1898": 4.8507,
+            "1899": 4.84271,
+            "1900": 4.84844,
+            "1901": 4.84365,
+            "1902": 4.83991,
+            "1903": 4.84228,
+            "1904": 5.17846,
+            "1905": 4.84978,
+            "1906": 4.84285,
+            "1907": 4.85138,
+            "1908": 4.84338,
+            "1909": 5.19721,
+            "1910": 4.85138,
+            "1911": 4.84739,
+            "1912": 4.84478,
+            "1913": 4.85226,
+            "1914": 4.85002,
+            "1915": 4.85039,
+            "1916": 4.85444,
+            "1917": 4.84588,
+            "1918": 4.8495,
+            "1919": 4.85217,
+            "1920": 4.84949,
+            "1921": 4.84631,
+            "1922": 4.84476,
+            "1923": 5.17493,
+            "1924": 5.19107,
+            "1925": 4.85154,
+            "1926": 4.84261,
+            "1927": 5.44494,
+            "1928": 5.14044,
+            "1929": 4.84927,
+            "1930": 4.84493,
+            "1931": 4.84048,
+            "1932": 4.84204,
+            "1933": 4.84664,
+            "1934": 4.84105,
+            "1935": 4.83981,
+            "1936": 4.841,
+            "1937": 4.84038,
+            "1938": 5.22894,
+            "1939": 4.84209,
+            "1940": 4.84356,
+            "1941": 5.20657,
+            "1942": 4.9004,
+            "1943": 4.90813,
+            "1944": 4.90655,
+            "1945": 4.88214,
+            "1946": 5.21239,
+            "1947": 4.86529,
+            "1948": 4.85849,
+            "1949": 4.85084,
+            "1950": 4.86533,
+            "1951": 4.86,
+            "1952": 4.85847,
+            "1953": 4.86113,
+            "1954": 4.85194,
+            "1955": 4.85611,
+            "1956": 4.87124,
+            "1957": 4.8777,
+            "1958": 4.84686,
+            "1959": 4.84732,
+            "1960": 4.86364,
+            "1961": 4.8509,
+            "1962": 4.8663,
+            "1963": 4.87064,
+            "1964": 4.86099,
+            "1965": 4.86103,
+            "1966": 4.84569,
+            "1967": 5.17792,
+            "1968": 4.84796,
+            "1969": 5.20648,
+            "1970": 4.84901,
+            "1971": 4.84838,
+            "1972": 5.74018,
+            "1973": 4.85813,
+            "1974": 4.85367,
+            "1975": 4.86684,
+            "1976": 4.87041,
+            "1977": 4.90603,
+            "1978": 4.90475,
+            "1979": 5.25145,
+            "1980": 4.94444,
+            "1981": 4.92124,
+            "1982": 4.90832,
+            "1983": 4.94722,
+            "1984": 5.67636,
+            "1985": 4.939,
+            "1986": 4.93543,
+            "1987": 4.96136,
+            "1988": 4.92447,
+            "1989": 4.87603,
+            "1990": 4.86128,
+            "1991": 4.86822,
+            "1992": 4.86666,
+            "1993": 4.85995,
+            "1994": 4.86025,
+            "1995": 4.85738,
+            "1996": 4.86953,
+            "1997": 4.86535,
+            "1998": 4.86591,
+            "1999": 4.86231,
+            "2000": 4.86466
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
index 9be8a9dc0ca..dc1e1921fd8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
@@ -1,1028 +1,1028 @@
 {
     "throughput": [
-        94.6087716527102,
-        115.85992244026639,
-        138.9562527069375,
-        133.18726531918395,
-        81.97861561771212,
-        134.30726469422635,
-        86.456140428456,
-        114.99456351298251,
-        147.3101800153954,
-        3.0364623744653003,
-        124.7590786954667,
-        134.2276982994434,
-        3.0580463134110167,
-        117.03969654341354,
-        130.92134521286803,
-        48.493091604204935,
-        1.4498729599486508,
-        128.01470907994928,
-        1.8330770354872434,
-        66.31842482241125,
-        82.24189975425459,
-        1.07058112939944,
-        1.8815468970982412,
-        0.9373246942729808,
-        134.9963160815443,
-        2.285771114682068,
-        43.068220270070434,
-        134.9677086822377,
-        82.44946740133796,
-        47.71839155542011,
-        114.4199568886962,
-        29.67621576315833,
-        144.1589742491705,
-        95.8164720809401,
-        122.80562228460093,
-        39.21436814433054,
-        3.041180292262413,
-        3.2867844729646842,
-        72.43808226229888,
-        0.8371525937296347,
-        1.2212635079980698,
-        145.6869075644325,
-        42.317711349146016,
-        109.1196064871946,
-        73.6281770453198,
-        140.4495689387567,
-        1.219834296561022,
-        138.66856497329005,
-        23.33818821323391,
-        67.82342558671365,
-        130.09683254313987,
-        147.60199288178146,
-        0.9427431720755464,
-        3.2856495013162523,
-        79.12426666101076,
-        86.41557345094756,
-        120.17346279825053,
-        137.16615251640926,
-        108.93291864542198,
-        110.10504114490513,
-        46.19253755421628,
-        0.950218846923012,
-        136.50642826951463,
-        142.73168666846448,
-        1.2206786818073785,
-        1.898581377105612,
-        131.72636154091063,
-        2.2842414327001976,
-        89.76521170090028,
-        114.66053545744656,
-        58.64474290044525,
-        0.8367865961030284,
-        128.01767795820945,
-        60.87292097103301,
-        124.20016865241587,
-        119.59336898055426,
-        0.9425820346281929,
-        93.70053305431952,
-        1.0728113870213674,
-        135.7596767309971,
-        112.89357243644062,
-        89.2743296587299,
-        137.86411291342458,
-        135.6974706051771,
-        102.59633828443238,
-        129.82058179399326,
-        139.57672703148444,
-        140.5642311163746,
-        78.49182953675201,
-        123.40912657074227,
-        82.74099904578694,
-        75.5490641626476,
-        93.38596238341951,
-        141.19058076067225,
-        1.072254167577298,
-        100.8669047802279,
-        132.77382347347034,
-        92.29086179175866,
-        137.20301032384705,
-        89.57723938765776,
-        67.5465256589703,
-        0.9498935124108836,
-        1.0716887464650027,
-        0.8365472180547067,
-        137.902625307774,
-        132.67132600219722,
-        1.45201860416265,
-        1.8366476879619427,
-        88.65095604379363,
-        132.1806036761347,
-        126.0481874394642,
-        127.43750324083169,
-        93.27238135265156,
-        109.83884164204308,
-        102.30516355984702,
-        141.10387096377744,
-        0.9425154448032942,
-        95.04281981148903,
-        103.11525529548061,
-        0.8361762901534399,
-        135.3171561172067,
-        123.30032998064965,
-        118.75691144485415,
-        82.21375599642211,
-        66.37216333263251,
-        120.02349229491865,
-        27.339414655466246,
-        133.1312422227687,
-        123.02377779863252,
-        111.0798894329,
-        58.88405247768833,
-        131.31767475108893,
-        40.19076958615912,
-        123.58362152151858,
-        130.6541142941889,
-        61.39555613504246,
-        43.92154495664044,
-        1.037012527495492,
-        127.16052127606021,
-        137.06554800183082,
-        85.67161160523041,
-        1.0253417447981334,
-        139.20903624514017,
-        140.19068787455728,
-        117.67416498245059,
-        23.410837515725987,
-        130.73052473972666,
-        22.561824695346466,
-        1.028901717647808,
-        119.30712483977753,
-        117.77548263464804,
-        135.2959098119142,
-        142.10193821260228,
-        1.0366044325624144,
-        1.0350271698893887,
-        132.8943567509843,
-        51.50353963446039,
-        113.39559408843714,
-        124.25424103796537,
-        129.60407993083075,
-        136.8566687186031,
-        1.036163010240988,
-        1.0345739017743927,
-        118.72350056844492,
-        32.453707095990595,
-        43.851925176925825,
-        139.39206855448938,
-        141.0979597861742,
-        132.81461728578432,
-        80.95956255477945,
-        133.42483643501154,
-        57.27721135575491,
-        81.47649794801364,
-        79.39765285063396,
-        56.40255861789973,
-        0.8890603607397893,
-        137.59325887086797,
-        118.03982850100024,
-        53.04390121587005,
-        88.31177924841927,
-        1.0287550608831881,
-        54.67393025836421,
-        54.73556135447348,
-        129.6143036059356,
-        123.57095756116274,
-        146.05184555314386,
-        55.506024155977386,
-        84.40666358740559,
-        62.68531518105107,
-        147.42894642823578,
-        1.0274253590993496,
-        145.9063526676371,
-        76.36231256557768,
-        1.035808949157935,
-        136.1858098182613,
-        93.13144140533397,
-        54.57886608953819,
-        1.0251956490815057,
-        1.0270063804838983,
-        67.96952180390161,
-        136.90103479290272,
-        78.62986077133174,
-        129.97235998681177,
-        70.57784076609056,
-        1.028567312218149,
-        69.64434330087829,
-        1.0266016363366386,
-        25.142311727265525,
-        139.54750333578679,
-        118.80547132463877,
-        1.0342055876192149,
-        132.79991800938092,
-        88.25494664060619,
-        132.4600307114398,
-        1.026200775415348,
-        111.33264788932784,
-        1.031301270403004,
-        104.45912302410692,
-        1.0337771723701492,
-        124.53550504281608,
-        1.0283501183885058,
-        126.53361938982871,
-        139.83512785200963,
-        102.28350299734186,
-        122.68389734539087,
-        139.27095111763788,
-        1.0333552237490158,
-        97.04945381465573,
-        60.63422077140298,
-        1.0248694052483192,
-        96.77644543721476,
-        118.38370846079931,
-        1.0309087229819596,
-        136.0487423665781,
-        1.032932214377732,
-        104.96525711514936,
-        50.75370028394122,
-        125.67617176346853,
-        125.47392048276225,
-        101.59371483024698,
-        119.1183231384482,
-        134.24568445137294,
-        1.0323996653747745,
-        119.28563313083153,
-        50.183581144589674,
-        107.50817556608582,
-        127.4693561344537,
-        116.0234844098742,
-        149.0429439759437,
-        127.77855747904051,
-        1.0319900690130652,
-        129.7400124946839,
-        60.27584011696136,
-        1.0245534026749026,
-        113.8687773549026,
-        129.9927880985222,
-        41.55332067297356,
-        12.991853549713621,
-        144.9384518471586,
-        127.77570879015505,
-        79.09214991388126,
-        1.0326234729165304,
-        144.50618896622706,
-        44.461452482592826,
-        145.75357879817352,
-        150.5618330832813,
-        123.17802281879979,
-        147.0133924731902,
-        57.07203337285457,
-        140.17944630269687,
-        44.5066568841284,
-        150.2834791394652,
-        146.37106237628518,
-        135.59553639884948,
-        21.91845075979551,
-        1.0391172002596458,
-        92.42182316100705,
-        14.98578222593142,
-        19.944740287073653,
-        32.75622847272977,
-        58.94666795839769,
-        1.0428676908165904,
-        97.94938911630567,
-        140.5399781540016,
-        36.397689902912774,
-        1.0322919875583962,
-        33.76444948259586,
-        147.54902815924785,
-        51.316830076622495,
-        153.55703202636914,
-        46.423895018386204,
-        140.271682540213,
-        1.0340651759548871,
-        85.22971449383292,
-        141.80480996358014,
-        1.0234621691055457,
-        1.0355322329825165,
-        136.96321865236195,
-        138.2293990177049,
-        136.89440582973347,
-        96.94919171687799,
-        54.992986423891566,
-        142.91167590864902,
-        138.73615931624403,
-        86.32837448704223,
-        1.0424247604140402,
-        127.58052889290863,
-        138.2472241943501,
-        1.0338260095695477,
-        1.0317372756221133,
-        150.59249576769173,
-        1.0229533138894364,
-        149.1711141084735,
-        1.0419379125129562,
-        1.040305113121658,
-        150.13261057757276,
-        62.47975017460808,
-        70.20443057037575,
-        76.88821624674898,
-        1.0225242667788867,
-        136.83301633777177,
-        1.0414381555227956,
-        131.6044067829552,
-        1.038902005769604,
-        1.0335832618537684,
-        83.38230404797935,
-        3.047737981863063,
-        140.9843162162637,
-        1.0352264324041114,
-        1.0409374510445146,
-        103.17228299164871,
-        1.0383219913492376,
-        67.5151836065632,
-        126.94018489907108,
-        95.29974174831813,
-        1.022161551972834,
-        1.0348032799350415,
-        93.24855217625235,
-        140.00831851627856,
-        142.46553219867087,
-        80.52507876480331,
-        149.47939431741142,
-        125.60095189608528,
-        92.57991472689042,
-        153.09192667088175,
-        98.78787611117323,
-        136.9802701171813,
-        1.0378200246498124,
-        79.05370338483348,
-        145.63143231877774,
-        107.86253722014555,
-        113.1390555766259,
-        150.4596904971142,
-        6.010262757833046,
-        138.11675690694213,
-        1.0371929842524894,
-        55.1702723554103,
-        148.4142582794926,
-        108.62464742566522,
-        142.2515578682958,
-        149.5588988951372,
-        1.0310870179234204,
-        32.798276334675066,
-        145.8363475163408,
-        82.52497836005318,
-        144.77105210255448,
-        140.95035733017403,
-        145.4844811663436,
-        145.0646083055648,
-        139.1641494303434,
-        1.0401220454548914,
-        146.10598185112948,
-        1.0335329080843159,
-        1.0316085392161136,
-        133.98012837767038,
-        129.62059667226987,
-        151.2681266565858,
-        1.030719335336581,
-        135.9600336007384,
-        1.0366589924031362,
-        107.70864165999221,
-        118.06361914834272,
-        148.4615541738592,
-        135.1206190516379,
-        1.0788915925864082,
-        1.0662361391973343,
-        1.0784094142292293,
-        145.5492563111853,
-        100.1745158858024,
-        89.97448812790176,
-        140.13008352060388,
-        8.378443606045758,
-        19.841723966559687,
-        31.11972559764219,
-        127.75589035167928,
-        144.649118240912,
-        83.40454687650907,
-        13.609558087727212,
-        144.14916775068022,
-        143.0831699051951,
-        144.53789580070173,
-        129.35689525213576,
-        126.54760361436873,
-        136.72725454688293,
-        83.66753329456253,
-        35.238850690537326,
-        138.73588075606074,
-        148.39285997484404,
-        141.43706957675556,
-        35.20788617289704,
-        140.22918428708584,
-        141.42288954532623,
-        80.8071906111917,
-        53.480908541665116,
-        96.60869116876205,
-        138.83030943256392,
-        146.89537016655746,
-        1.0659353965573166,
-        138.66041009897964,
-        138.0783824554628,
-        54.95061283513892,
-        1.0688789370964418,
-        145.4981195236156,
-        107.91672388693667,
-        147.39387423946786,
-        143.49840246862203,
-        1.0781871694837721,
-        125.37215873599833,
-        46.390553110182545,
-        1.0683430650310588,
-        60.55314896188811,
-        128.32962060837178,
-        142.6648214311374,
-        1.065532502621677,
-        145.06202945295232,
-        149.5985088362253,
-        43.61426254132819,
-        139.2120402464869,
-        138.80120892663803,
-        142.59390751862693,
-        147.27000174003754,
-        139.5980537408405,
-        142.37081759892675,
-        76.47257166426981,
-        0.8663971721944621,
-        1.067847671923619,
-        1.0752972325757186,
-        139.11225337731244,
-        154.1012640338781,
-        91.85315813315137,
-        7.34066705730821,
-        1.0763437477764217,
-        56.03391448680589,
-        1.067309924884827,
-        1.0747789028833068,
-        1.057667310022394,
-        146.4284745539176,
-        142.32867288307636,
-        132.81801172672715,
-        142.5746724111237,
-        43.178263922620026,
-        140.19958418325498,
-        1.0742201855279276,
-        139.95237701874325,
-        124.69044225989671,
-        89.93275546978569,
-        1.0778110524743836,
-        108.03753008375865,
-        0.8649825661375887,
-        101.22782607000799,
-        138.6615942910557,
-        1.0572642952018412,
-        143.509260845593,
-        1.0651693329533294,
-        97.454990956795,
-        1.075960473594851,
-        104.89429761368234,
-        153.46849816095335,
-        143.28204379991922,
-        112.57923589922926,
-        145.35468060283986,
-        119.53338040876814,
-        132.53105489182144,
-        146.60735281445733,
-        0.8648000721123511,
-        132.61504628627392,
-        140.81953388748138,
-        1.05684091289561,
-        147.29646966899597,
-        1.0646855258714663,
-        1.0772400203863821,
-        137.87592499226204,
-        101.79954304062817,
-        134.45893707567646,
-        1.0737967838723397,
-        147.3289039421509,
-        142.95955673278567,
-        123.11846557585149,
-        139.7223884224781,
-        5.274894457437767,
-        0.8646226703470901,
-        135.27010135142623,
-        134.53222451904563,
-        140.4520894166607,
-        148.6784682726068,
-        148.83999547746723,
-        144.76059628877204,
-        146.09818079047014,
-        0.8644123666240657,
-        133.05795012757028,
-        141.21253159110282,
-        147.08086640702987,
-        153.13511211461227,
-        147.72437078211334,
-        53.87242850230838,
-        61.34701685378028,
-        74.50771860339175,
-        16.40780504974564,
-        16.448796993269678,
-        144.08505364828036,
-        143.78069847853888,
-        145.08382905436133,
-        139.4144567792124,
-        1.113422304912727,
-        23.732299099149245,
-        146.716938504402,
-        1.1150428401994323,
-        1.1070863332993708,
-        147.462815334713,
-        15.300506166735937,
-        142.89311901203018,
-        35.881455163220174,
-        0.8959120615185874,
-        134.50389621984408,
-        79.91603718165896,
-        145.31776951960734,
-        153.19384567886857,
-        142.494036234602,
-        130.58249312188119,
-        1.1128817603274543,
-        56.157995916719756,
-        35.81413980204931,
-        116.5213087641768,
-        63.30354399512571,
-        55.0117106848875,
-        47.52954249314361,
-        153.04709230401787,
-        1.112276523473745,
-        80.1523559974256,
-        136.20373724941714,
-        1.114673225365626,
-        1.1067132158651183,
-        149.29883052073288,
-        145.10950784560325,
-        130.53765167080937,
-        1.111788125890117,
-        0.8957719496064405,
-        1.1050775451489783,
-        17.522300994030367,
-        154.45472111064055,
-        152.07616582090188,
-        1.1020107149905272,
-        138.6808068419634,
-        76.87873177159636,
-        51.43702839643221,
-        138.95045176064437,
-        138.64177504011988,
-        140.72197385602811,
-        132.80947742972836,
-        149.78872816785005,
-        139.94034036065392,
-        154.2632802491591,
-        55.57148538150843,
-        1.1044580058296936,
-        147.1712801496827,
-        77.84198065949245,
-        142.38330204183904,
-        151.76812011990265,
-        145.19131540821485,
-        147.26566215388425,
-        87.12413393605841,
-        1.1038403429439656,
-        141.4935550752979,
-        145.7397470598185,
-        3.3080164659931235,
-        123.0327553358976,
-        146.24080278853327,
-        148.10448175245884,
-        29.234562433775857,
-        151.30177873039895,
-        135.4653748135468,
-        144.3293913931314,
-        148.16163203136404,
-        1.1015876034201657,
-        1.1114790318458536,
-        136.68047783885697,
-        77.72584511329579,
-        125.73692105352463,
-        106.98755729483561,
-        96.25926845246491,
-        1.109721323323522,
-        141.71073652156545,
-        130.22006710827588,
-        145.24478945746003,
-        80.67459353439743,
-        1.1033551544760267,
-        150.03177939272493,
-        154.12875534463626,
-        150.04771421074818,
-        1.1010813815407388,
-        1.1110434127990452,
-        145.385699877379,
-        86.86487551811825,
-        130.16687493633253,
-        143.8726181331947,
-        111.91340621077623,
-        146.0394914387852,
-        1.1006353022455784,
-        134.47903589563677,
-        148.6907436994389,
-        102.87151097507036,
-        137.41724911494663,
-        1.1146766644704549,
-        143.85952373403495,
-        146.92280951248307,
-        1.100156488603178,
-        144.04783334738536,
-        148.53630346113712,
-        58.74848466983248,
-        147.0485685726298,
-        141.32891699761203,
-        142.8441702922343,
-        131.04366253726744,
-        128.6305301075303,
-        1.1106412111686195,
-        147.90025888582002,
-        0.8959265584913588,
-        149.5194069726666,
-        137.43649451567626,
-        1.1068068376551545,
-        68.05269425995475,
-        138.94056631255367,
-        138.43818227469507,
-        69.60391199895408,
-        114.83395091462887,
-        151.34107787433956,
-        141.57237630997332,
-        146.07433910500515,
-        9.941778754980154,
-        131.297822968639,
-        10.386636719874664,
-        10.545636067043365,
-        114.58677137445733,
-        75.28902943071078,
-        90.63452059810655,
-        143.58694736923238,
-        9.901118804514459,
-        144.5206530902411,
-        144.78737732574044,
-        79.81136215142409,
-        84.9314508821071,
-        120.18939827456474,
-        10.225253542151219,
-        9.702822548173124,
-        103.1188517219872,
-        138.5008491242522,
-        92.02238700298246,
-        151.99592340131602,
-        9.807595290716304,
-        150.0447954775559,
-        134.2614008494909,
-        149.38544573345007,
-        149.62298116309924,
-        124.32358754465251,
-        132.817456221544,
-        10.50607995390264,
-        9.78317681034783,
-        151.07916494121415,
-        146.93545537009487,
-        118.45851163082196,
-        145.03008316360754,
-        154.4449202186591,
-        146.86002069809945,
-        150.6932855951215,
-        110.74803327496042,
-        127.40788523389726,
-        150.81323854197058,
-        150.0047673310006,
-        149.6063654551971,
-        133.87244996538675,
-        10.329695475492791,
-        9.414695716712222,
-        106.77032789813472,
-        118.34636653947105,
-        123.44441062862572,
-        144.9015592115516,
-        153.74652990582067,
-        10.065713405335144,
-        129.38998560194165,
-        117.69087049838025,
-        99.15650839997046,
-        127.90462338199198,
-        147.3574863739125,
-        9.696544883885949,
-        9.8853852911422,
-        128.35872796896587,
-        145.2939860705264,
-        128.72081963712404,
-        94.09935653689803,
-        142.8780531031409,
-        130.5213122981276,
-        126.89288883528536,
-        153.36107852781166,
-        149.17239657923582,
-        9.177632630803961,
-        9.387171298727486,
-        109.68196882316985,
-        148.55536204011432,
-        152.61730207818772,
-        9.648922236946333,
-        132.805446535875,
-        138.74295200738652,
-        141.66118217831166,
-        124.0399127789103,
-        113.05005278683446,
-        149.71230902297984,
-        25.727698431920004,
-        129.56419655827216,
-        130.40687823665095,
-        128.46470366050013,
-        150.46298369674685,
-        9.22073843893938,
-        110.36443029340542,
-        148.23878821929193,
-        10.219508495480236,
-        9.615051521185155,
-        9.8723813087942,
-        149.91378148843256,
-        9.149056684599877,
-        130.37704092008303,
-        114.86611671621016,
-        134.53633480709703,
-        131.11593468604048,
-        149.74665952988033,
-        136.60701891253495,
-        146.50864617645632,
-        9.094221140419737,
-        149.69902295915708,
-        126.93245475406366,
-        141.2463933703881,
-        10.18172163650932,
-        136.76582155059438,
-        155.5823388453975,
-        144.68082947663285,
-        142.0128061769988,
-        116.20800508912414,
-        101.13756407758095,
-        10.050927550768915,
-        10.14139856150474,
-        9.573219645146107,
-        146.33874064646594,
-        137.22302119976462,
-        132.14965518046,
-        148.08190796641483,
-        117.6843964457568,
-        153.04352772565807,
-        146.79238076404926,
-        9.522740968586977,
-        145.93484469600287,
-        13.925952420322696,
-        12.697420287309185,
-        146.39122941822845,
-        113.94298610788566,
-        13.844109957456581,
-        154.57922917096633,
-        13.525210269101805,
-        103.83976095796662,
-        97.75660804271413,
-        135.83818209343426,
-        158.60060111529293,
-        111.57793188874757,
-        13.768524263105455,
-        154.2203592546867,
-        108.85242762118563,
-        111.15752259030245,
-        149.5942138872604,
-        119.77102605185765,
-        120.68065341205389,
-        105.29698904913548,
-        151.41465167808087,
-        138.90606724001483,
-        13.437371194424983,
-        119.97194649055415,
-        144.6223725248399,
-        146.9934910169238,
-        149.45319992777343,
-        121.48260402443249,
-        13.662736071688842,
-        14.448955892498802,
-        144.5545360346381,
-        154.00382983055897,
-        151.8635735223181,
-        137.2321484611102,
-        119.71487519948164,
-        88.24978714231261,
-        147.74815341218743,
-        142.1113258863455,
-        132.08775922189477,
-        124.63351274554526,
-        145.72256212355262,
-        100.50708502243579,
-        139.16363846809003,
-        114.82662827063822,
-        154.78307253831395,
-        149.22879563842886,
-        152.6744734255461,
-        145.81022434241217,
-        152.68018782123758,
-        116.75549006136289,
-        12.968595875688791,
-        6.824624970615158,
-        125.05116103474757,
-        147.66072487793718,
-        147.5735120742967,
-        139.1302141298083,
-        146.48542990069834,
-        12.674865288395944,
-        147.88858853602966,
-        6.8124480142416175,
-        137.54766974463703,
-        130.89979405333307,
-        13.364169845161861,
-        14.116086127002273,
-        130.3002929300388,
-        116.98398239487472,
-        152.70827610346095,
-        98.51470626500011,
-        135.1252373635164,
-        14.405992358855888,
-        154.13709739001223,
-        146.28661687368685,
-        137.87827066214206,
-        12.621081453489012,
-        154.04574874294514,
-        6.802625211185703,
-        152.18661864386252,
-        149.30257880598677,
-        13.244501725269068,
-        138.34068638798834,
-        150.95140747506372,
-        141.8441899037163,
-        152.99022366652198,
-        103.95004802425926,
-        140.28144756248412,
-        154.51222806007945,
-        85.40777548962518,
-        154.7067128296305,
-        120.47843952303268,
-        12.568053995018431,
-        12.916583075889136,
-        105.92477484543576,
-        137.92878859711615,
-        135.13853669037294,
-        137.88549737290148,
-        157.83019925734393,
-        145.48927689323145,
-        12.509532718065461,
-        150.6233829715981,
-        119.23669844460764,
-        138.49099023171033,
-        154.0870149904812,
-        140.1862744667834,
-        148.860174031694,
-        147.54629689336036,
-        12.448861769003683,
-        152.4711466483636,
-        102.47079224461186,
-        152.40864885890767,
-        156.21773232766026,
-        13.139291580904986,
-        150.30653960489693,
-        145.43571147072188,
-        132.8965387342577,
-        144.85972103961666,
-        125.5438694385711,
-        158.07457773478276,
-        14.359506122440205,
-        137.7658155977229,
-        153.68125116011197,
-        156.57780724945528,
-        12.394708947912125,
-        12.874702780202174,
-        110.61518572692995,
-        149.4338565730422,
-        149.67552030435513,
-        146.20909415912828,
-        9.308833539527914,
-        26.176147260970783,
-        8.701217384742513,
-        66.92241449340185,
-        105.12940849136734,
-        145.25326276553395,
-        139.68219350261262,
-        131.60335890332783,
-        150.53420884400245,
-        17.552483447968918,
-        99.60476667168517,
-        9.003208512207522,
-        8.539560747895454,
-        9.946172723540226,
-        150.55644446784382,
-        9.608936841972842,
-        104.80864366760326,
-        25.95068644438624,
-        99.42592550150236,
-        108.35979254469888,
-        113.9171427720856,
-        9.905905876631499,
-        131.1684982861573,
-        154.7989292174601,
-        151.34753888952145,
-        150.11816141981262,
-        143.00557828542912,
-        126.2310299151925,
-        113.53830001728545,
-        148.13405630794878,
-        150.7564429392251,
-        155.252325076404,
-        18.20048176554747,
-        25.725436761645142,
-        8.678711562613207,
-        143.3683328827327,
-        127.0294451168928,
-        137.50119476282134,
-        10.068367539846923,
-        155.64822784014916,
-        153.2789382926615,
-        25.46950813818654,
-        142.9138107220956,
-        155.10510899417167,
-        107.40557834412083,
-        9.871948602847068,
-        144.4712732194919,
-        140.17802930301565,
-        9.286026243902361,
-        129.1488895575147,
-        124.35586045151207,
-        140.1410811550992,
-        96.63692877337894,
-        153.62093095799207,
-        156.05800033315097,
-        9.587609950939838,
-        140.09721428165886,
-        134.898750425008,
-        8.652809034763463,
-        8.989448046931262,
-        107.64260577858933,
-        9.825071080298192,
-        150.6237132142087,
-        143.76058852986372,
-        154.01627264735168,
-        140.85322298632985,
-        143.63714834446708,
-        149.7259575806535,
-        8.53942846683121,
-        157.02635815805976,
-        150.83913162907433,
-        154.0283691261865,
-        9.246842209481716,
-        154.5851361854829,
-        133.4662155767381,
-        137.55396410787307,
-        105.77910782321499,
-        148.97953057255376,
-        111.3041581371634,
-        9.543858351726714,
-        142.71996301994741,
-        144.2417836324451,
-        148.5293262803374,
-        8.95331376662564,
-        105.2724164655814,
-        149.16646109060707,
-        151.1947852118465,
-        9.503293907683512,
-        133.40055362812345,
-        8.776394391795916,
-        148.3675722527084,
-        154.66946641450528,
-        122.71674068416665,
-        149.62192317697068,
-        153.40159484208397,
-        9.46860898864519,
-        146.10526710538994,
-        143.96020057925128,
-        8.62472208077336,
-        8.906885562515198,
-        105.7754218686014,
-        150.17957794387223,
-        144.0451331512576,
-        149.95461039551162,
-        151.46311089131117,
-        142.22104279807664,
-        147.3679944003333,
-        140.5394711174869,
-        123.62157744638432,
-        152.32796921399395,
-        156.6603241829257,
-        9.43621164630811,
-        158.2241383954169,
-        149.33346139426692,
-        144.12074054746773,
-        143.1977521817863,
-        8.536662624511228,
-        9.785635570067782,
-        147.61880087321424,
-        9.402323265876474,
-        159.1161790596516,
-        146.56796834276156,
-        147.64890403285438,
-        157.70847517328534,
-        114.64282143770687,
-        148.5000942425868,
-        10.052761003641129,
-        147.38801074409378
+        98.47864949895008,
+        63.93792629897559,
+        166.49088904974073,
+        148.10611103663214,
+        136.93608898138933,
+        153.87586308063382,
+        90.56559317052603,
+        128.5291550251628,
+        162.07670305023993,
+        4.196475118529487,
+        147.98743190294235,
+        149.72190006929446,
+        1.1777631788022311,
+        133.74963259040626,
+        150.11088322452974,
+        51.863180020864455,
+        4.139051494405947,
+        79.2557164919149,
+        1.6071996867452278,
+        70.01915930069646,
+        137.26891673137558,
+        1.0402098481802287,
+        1.8594022431966566,
+        2.039486534010741,
+        146.2938256177694,
+        4.149796716964247,
+        46.34667799086249,
+        151.47361823216394,
+        137.54739677623354,
+        51.120748066850325,
+        136.84512611150544,
+        32.11962977236786,
+        157.56752902839474,
+        47.12119148820226,
+        145.7314367353006,
+        42.20270560372231,
+        1.0426098595499007,
+        3.5892682955617827,
+        76.57100636536596,
+        1.612496526198,
+        2.6881979572654413,
+        111.88402006134972,
+        45.58338247702666,
+        111.4111889571842,
+        132.16301113659247,
+        161.64295403385984,
+        2.664705818704618,
+        157.1638935590632,
+        25.286871922093454,
+        37.4310109209181,
+        153.65911351957632,
+        170.7256762539797,
+        1.042128189044151,
+        3.5869040413041917,
+        83.30261586197105,
+        90.55970202339806,
+        132.9415846015795,
+        95.80834182322752,
+        112.4369142570399,
+        130.7156977512895,
+        90.98968148626129,
+        0.9371270459059615,
+        159.09279181195387,
+        162.9970081970886,
+        2.6700708026356366,
+        1.8557378891084773,
+        156.12103246797463,
+        1.3653778104766194,
+        143.46571269908148,
+        130.6346250925551,
+        62.46023289115923,
+        1.6116060776090406,
+        139.8111163213305,
+        34.86018737886305,
+        146.06865198079345,
+        133.96801334258495,
+        1.0417626130871034,
+        97.53781169320182,
+        2.0478975910586503,
+        151.90776052541932,
+        126.40035137658552,
+        44.78808603802679,
+        163.9803901721219,
+        152.78287546210825,
+        154.77428093351637,
+        145.74430748169019,
+        163.03421864587594,
+        146.28703545539014,
+        82.55934081518444,
+        73.53123347847824,
+        87.20650201489909,
+        79.6237289961617,
+        146.76012425672718,
+        162.46398331888344,
+        2.046000130560097,
+        104.11707807083185,
+        142.7981951169222,
+        45.781111784259096,
+        164.13498801895528,
+        93.34392878508068,
+        127.09756182184553,
+        0.9369885821746623,
+        2.0440080852076448,
+        1.6107470231739485,
+        149.4484511068655,
+        87.5539915318001,
+        1.3670348174101508,
+        1.1796264961520015,
+        142.53546263417087,
+        150.2065859393766,
+        145.65883203776818,
+        142.2125733485302,
+        96.99016545580078,
+        57.32416740237564,
+        106.63530054957698,
+        159.19142654590536,
+        1.0415326032228118,
+        98.71719677010607,
+        106.73175053259962,
+        1.6100826372227688,
+        146.64805335844048,
+        72.59518577946031,
+        142.34132184480842,
+        85.94240702745647,
+        126.17687901514078,
+        135.7696701691411,
+        29.62308081982307,
+        148.2421144346034,
+        130.36261145275355,
+        53.13931721337651,
+        60.51160243931191,
+        141.54695622051943,
+        73.11803837069677,
+        137.21251141324606,
+        148.63844490308944,
+        62.8404582738594,
+        45.401831957608,
+        0.9643006239654945,
+        147.2298500624911,
+        151.91506054646217,
+        140.48716103219812,
+        0.9577624967779577,
+        160.06459889404132,
+        155.2359539910114,
+        126.59645077786885,
+        15.69438649059929,
+        152.80784197867072,
+        23.527136960081226,
+        0.9561607658842026,
+        135.304826702121,
+        142.47511264536794,
+        149.8501903787043,
+        151.43523022097875,
+        0.9640793717349251,
+        0.9631519875374979,
+        145.2950579689095,
+        104.16937732598902,
+        131.1708059930721,
+        144.18743838648734,
+        143.6919419808989,
+        145.5428193502994,
+        0.9638106812588461,
+        0.9627615573404509,
+        116.54193238808332,
+        54.308902955274014,
+        45.33558667751163,
+        159.57290743060722,
+        156.60366994005867,
+        142.03263718363198,
+        40.71403223415776,
+        155.40510615972553,
+        58.6681100653237,
+        137.0437576533739,
+        80.42300690375168,
+        58.033083103031665,
+        0.9693871919683402,
+        145.73573001557583,
+        60.44621412824422,
+        54.994288450325136,
+        88.73692291143061,
+        0.9559459748869998,
+        56.08954858644736,
+        56.31747770886735,
+        142.34693049846092,
+        132.51002333480037,
+        108.96587128971876,
+        57.39669142091791,
+        85.1254544103699,
+        122.1342568773111,
+        170.14800453897098,
+        0.9667745869936778,
+        164.77118206030752,
+        77.67607540068808,
+        0.9637172808805204,
+        159.27278631745818,
+        93.32941075871183,
+        114.31154051585622,
+        0.9577271441482065,
+        0.9663851340406727,
+        69.18116638176265,
+        145.49566595839337,
+        39.99458755398874,
+        151.72058228459386,
+        71.71902007184255,
+        0.955684788125637,
+        70.8845735459765,
+        0.9659986810119839,
+        26.22947505868186,
+        149.5122587573231,
+        62.37088691999424,
+        0.9626226162613168,
+        144.16390862207493,
+        143.18707878361667,
+        148.34680655358588,
+        0.9655981786202157,
+        128.6357514760558,
+        0.972457638109508,
+        47.97113131021637,
+        0.962257594040168,
+        135.91488529586792,
+        0.9555101570399641,
+        139.87244415060783,
+        161.80374363862717,
+        102.03749537949356,
+        119.90228156989667,
+        95.01508726085196,
+        0.9618747782794568,
+        97.04528669323962,
+        124.83482655795,
+        0.9575074351185681,
+        97.4749088017089,
+        143.04337002379702,
+        0.9720616869548507,
+        88.4343283770829,
+        0.9616266920922193,
+        104.03159874923712,
+        102.89124420706305,
+        140.2496100327507,
+        143.1710058572335,
+        101.42975069052237,
+        128.03336431254732,
+        85.69336920713639,
+        0.9613543134449882,
+        104.07697069101184,
+        100.02889226751559,
+        106.63283752921622,
+        144.57311516379912,
+        126.07240879815421,
+        161.55730431091774,
+        73.12112420438781,
+        0.9589217273481213,
+        142.0323058738417,
+        122.36148204858885,
+        0.9572538602096321,
+        112.98246752660035,
+        142.34355181617389,
+        41.04230698700827,
+        8.473685991981666,
+        170.80637904469666,
+        142.97081601431356,
+        140.00938953689527,
+        1.0308124281925075,
+        163.68673254202156,
+        43.76708184183388,
+        152.25998257998737,
+        111.67117755812934,
+        145.80673033340165,
+        160.967274593742,
+        121.82423347589321,
+        151.58970194946951,
+        43.836717431814456,
+        168.33474851388928,
+        152.8971313956712,
+        72.9024488252911,
+        21.820779024213074,
+        1.0392675847166184,
+        147.87020150991353,
+        14.897143028689484,
+        19.847221148151032,
+        32.431828340180246,
+        57.7813822991841,
+        1.0334876773950952,
+        94.25591710682407,
+        151.42229388821934,
+        62.73982551986958,
+        1.0305004930196628,
+        33.431851137208405,
+        162.37672318207316,
+        50.321107844780045,
+        120.0631996858246,
+        45.868384609266045,
+        150.25509288811767,
+        1.03641668355906,
+        82.19687660990678,
+        158.74432925111145,
+        1.041876067399849,
+        1.0459490020450795,
+        74.46636703262733,
+        159.72092018884473,
+        145.89909226306747,
+        151.4623812014693,
+        53.96440008638893,
+        159.793887362778,
+        148.37554042172758,
+        83.3128358383083,
+        1.033330707971675,
+        134.17516572064534,
+        146.71192985844118,
+        1.0352015128775223,
+        1.030228349427348,
+        173.4020929881413,
+        1.0414756431813357,
+        157.44806749626466,
+        1.0330400451866075,
+        1.0430419707188734,
+        167.82243267657728,
+        143.8312255273241,
+        68.13449792020043,
+        74.35987547428464,
+        1.0410410061956523,
+        144.46694632543532,
+        1.0327651323294085,
+        150.93003222189313,
+        1.0391803120976406,
+        1.0348231697568464,
+        80.60319434281541,
+        3.1207628480728475,
+        151.16210456830606,
+        1.044348655121621,
+        1.0324784232146003,
+        99.42447225407219,
+        1.038776111100077,
+        132.7893754958314,
+        146.8726662885585,
+        91.5964670484325,
+        1.0406970130016908,
+        1.0437330582244273,
+        42.28479249749239,
+        162.83839126288393,
+        151.86715746595317,
+        140.5094808302986,
+        170.2080960063118,
+        131.07684807335298,
+        88.96862061056908,
+        163.9922734476757,
+        44.213460221990154,
+        157.8010866400773,
+        1.0382665374856965,
+        139.57673454433854,
+        163.7758432408245,
+        102.99718171708128,
+        107.60774917922078,
+        159.16551335735969,
+        4.119717517454783,
+        160.5803771988876,
+        1.0378430568380714,
+        115.44357851711793,
+        167.4238211695712,
+        103.79633528746076,
+        154.03506418556444,
+        159.03692094687025,
+        1.032427282609682,
+        32.52187142118156,
+        158.57750457420016,
+        141.67055142208721,
+        160.71458938698333,
+        157.24106314480454,
+        157.40833384009724,
+        150.60022387354616,
+        80.91896448664748,
+        1.0430666391532655,
+        160.36671183081978,
+        1.0347878859497883,
+        1.030293958907628,
+        147.50533105226975,
+        152.4875796332852,
+        160.31618334728296,
+        1.0321960030040243,
+        156.27786873980907,
+        1.0375321120324796,
+        160.4885833961135,
+        111.93639192506156,
+        172.24078944530834,
+        145.3287404427809,
+        1.0880735082543522,
+        0.7878037099331565,
+        1.0864480413552253,
+        158.40272521901554,
+        155.28074693629694,
+        87.44836891077435,
+        155.54752700738993,
+        8.411714256180034,
+        19.862348977650086,
+        18.35501539895094,
+        163.43115890247273,
+        157.8836387689617,
+        143.68115882020365,
+        13.66284888141665,
+        160.7292101444063,
+        155.01427847930626,
+        150.31432418581997,
+        60.81928120084204,
+        145.3926688034953,
+        145.30123372502598,
+        144.98393507215505,
+        35.18970147025731,
+        153.82777107784506,
+        164.23228082777166,
+        145.88278452124027,
+        20.46954502286418,
+        162.0360370063431,
+        150.43884956663888,
+        142.41966677764808,
+        53.07266306010992,
+        93.50532435009316,
+        150.1523142285131,
+        152.33361454488718,
+        0.787209685332213,
+        159.9704569183677,
+        147.66926829001207,
+        116.31853611522087,
+        1.0774618364125428,
+        164.22843982362895,
+        103.98183305676696,
+        152.52952151222078,
+        90.29170862480086,
+        1.0862563048060565,
+        118.53710658997939,
+        90.19968385647951,
+        1.0770089089852286,
+        59.61890934626195,
+        134.6160499563656,
+        147.6477708991394,
+        0.7870687303401608,
+        171.47874197919785,
+        165.99226887272076,
+        83.5080960308232,
+        151.55871514895225,
+        154.9605789451006,
+        154.1866343413245,
+        152.69380076313175,
+        78.46281024467942,
+        165.86076250975873,
+        74.6681179766703,
+        1.0816751050475706,
+        1.0766059511099162,
+        1.091025249207128,
+        151.61539901543878,
+        165.44997737983917,
+        41.75139614518547,
+        7.388178711598297,
+        1.0848156120039962,
+        121.93333712957133,
+        1.0761843006794773,
+        1.0905643992997778,
+        1.075801598924969,
+        151.57738041471748,
+        87.38815331117043,
+        154.57766374016802,
+        153.3353461131615,
+        81.63500323812801,
+        153.88446167160095,
+        1.0900521500553328,
+        151.65017721794743,
+        118.01864188919838,
+        40.91238161739305,
+        1.0860502574663193,
+        103.72384951664927,
+        1.081356861209966,
+        97.70962808524236,
+        153.30715221364136,
+        1.0754011583086598,
+        149.80888083526256,
+        0.7870161596702333,
+        95.11588780527678,
+        1.0824954483404,
+        159.0909827809553,
+        176.4607736857684,
+        160.28483143240214,
+        108.14616986068252,
+        150.64495962435973,
+        49.52814184554448,
+        152.62988882612356,
+        161.40766773375927,
+        1.0809227984149974,
+        150.0601857860385,
+        156.59538854909297,
+        1.072689949598873,
+        152.81205676706514,
+        0.7868728895290079,
+        1.0857058881477388,
+        143.4694111503961,
+        159.8022996153893,
+        144.9300712596306,
+        1.089757442067835,
+        160.11340438331118,
+        132.79626776787333,
+        50.38448421210805,
+        162.42137561579725,
+        5.284417747700096,
+        1.0805116052247719,
+        145.73004732672527,
+        152.59775665509528,
+        151.63963715309214,
+        155.59850627759238,
+        104.41906641764095,
+        169.89843638971865,
+        158.37348320912855,
+        1.0800687750785642,
+        149.5543247935483,
+        156.60712632191078,
+        159.6236209903005,
+        163.09782416725415,
+        98.6328505039743,
+        53.85030009718123,
+        61.00364034342645,
+        142.05505100830447,
+        16.614192215593924,
+        16.582992843952567,
+        154.47389623241062,
+        150.9101058615698,
+        90.42581449278116,
+        159.53144787295545,
+        1.1253578624639393,
+        38.131573465314304,
+        163.695564516746,
+        1.1316048014866884,
+        1.1159054012388119,
+        152.5411314388352,
+        111.46983099035936,
+        168.09092507016115,
+        36.13058934697122,
+        1.1197910040154087,
+        142.05200673526159,
+        78.09074458708291,
+        157.63502242964265,
+        162.03218881710688,
+        80.0426703374817,
+        164.26384362727924,
+        1.1222030060702506,
+        123.66591496581279,
+        35.97653651285592,
+        112.29012034978103,
+        62.69199102131731,
+        54.806250360805244,
+        25.5070616004963,
+        187.35211092519995,
+        1.1217003700976045,
+        145.32823111763997,
+        145.9166945337544,
+        1.1301150192515073,
+        1.1155615329029929,
+        154.1440872758632,
+        88.5586247200791,
+        161.60021419086345,
+        1.121175594981433,
+        1.1194211460505468,
+        1.1184405197027008,
+        17.60883897305572,
+        174.5134372600641,
+        160.45245655990746,
+        0.8166461657826791,
+        160.30564706046655,
+        75.44218827386376,
+        108.54547521267394,
+        150.49806131791814,
+        153.04150189313873,
+        150.40965861420275,
+        125.63958433236749,
+        103.12983995128599,
+        164.17811633308784,
+        175.52459662743908,
+        121.09400696724566,
+        1.1180201884652679,
+        166.27365155489332,
+        76.42072368500718,
+        146.90227613796094,
+        110.70803654586257,
+        171.79379505267624,
+        158.67043375351244,
+        147.76280504628218,
+        1.1175125336867027,
+        156.89279233182117,
+        158.0652757498143,
+        3.343340016597665,
+        49.779892185016756,
+        173.36352621939335,
+        162.4424006508065,
+        49.49838297370054,
+        173.86161362836785,
+        128.03796900006384,
+        155.68412076198788,
+        137.87250806830016,
+        0.8165665367853991,
+        1.1298869482124425,
+        159.26492424008396,
+        144.56503533715272,
+        120.55988523349636,
+        103.3722869693168,
+        93.5099865200851,
+        1.1209786631771586,
+        77.46613714395933,
+        153.76092950699294,
+        154.6841596167678,
+        146.17966014780984,
+        1.1171782471429414,
+        172.55763339822,
+        174.99117233418923,
+        157.46750414970307,
+        0.816424346577868,
+        1.1274076620999394,
+        157.20421311127953,
+        148.14748951821153,
+        149.14697533706817,
+        158.95389608842163,
+        107.97531407241593,
+        151.43640801793904,
+        0.8162494126902972,
+        157.001545737823,
+        163.80848036600747,
+        158.89222886851297,
+        147.3506488140666,
+        1.1133445391411512,
+        153.68284200756125,
+        151.8834177926471,
+        0.8160708323289537,
+        136.43010052273473,
+        162.61423354524993,
+        133.02570532111102,
+        137.86961562609895,
+        133.04901735700332,
+        154.9473181767413,
+        123.93507737689346,
+        50.83204611520686,
+        1.1270195451857552,
+        161.44093109510388,
+        1.1195708009057284,
+        169.64321510449827,
+        129.6089117511605,
+        1.115946234318508,
+        60.34621183821726,
+        101.29881161208688,
+        160.90062346193574,
+        68.21783931047266,
+        154.41899008326143,
+        174.07515811573973,
+        159.677356250512,
+        159.67728671666873,
+        9.799978913114145,
+        94.27732771999344,
+        9.60214441506233,
+        9.392617132404062,
+        155.0463449410919,
+        71.59183194783785,
+        87.06866691125934,
+        157.13349078706932,
+        9.759436169606595,
+        110.44611293008246,
+        171.1626230380253,
+        89.85437363374635,
+        107.09248087440588,
+        126.74466225447065,
+        10.009602057141537,
+        9.177527712733529,
+        99.62101604875475,
+        102.08957950312852,
+        99.71118980213345,
+        175.89684251359242,
+        10.182586030301673,
+        171.66004511817064,
+        148.24171173832124,
+        164.5397331583309,
+        158.71440804719356,
+        86.55832242496149,
+        148.610396831239,
+        9.368509685917438,
+        10.136730874821687,
+        173.75231796226313,
+        168.18072479771067,
+        125.24195815296933,
+        151.26149869648452,
+        130.6197551882794,
+        174.23395009631983,
+        170.65779238484487,
+        148.1296912550562,
+        131.11524857886738,
+        177.99920893337523,
+        167.5808938510404,
+        158.60603057794222,
+        93.6097533900039,
+        9.587874811966838,
+        9.33150536695352,
+        141.2149869829261,
+        117.88939818622781,
+        133.45305575288236,
+        156.7555665933833,
+        166.2992810974147,
+        8.762060933047495,
+        147.60747975090285,
+        125.39702986854361,
+        126.29551477783566,
+        133.3684883476696,
+        169.84463465109542,
+        9.160889914093532,
+        9.75005007182584,
+        91.25897804548956,
+        171.15603143396729,
+        137.11852945151446,
+        119.70724002664221,
+        157.24098320319794,
+        144.12095644229885,
+        131.6771710258767,
+        164.00686483698965,
+        120.71707004833677,
+        9.560442320047777,
+        9.299425721987362,
+        147.15785637439873,
+        170.64643820040646,
+        181.465984660646,
+        9.098182272291353,
+        131.1874185050373,
+        100.18931014367688,
+        166.410568062446,
+        135.47929425317378,
+        151.28962080931584,
+        169.34032285811423,
+        24.163402926519016,
+        130.3951109594527,
+        133.85939391500654,
+        91.24306358260182,
+        183.98754016151273,
+        9.265911045247684,
+        147.14244062731618,
+        165.66255588662568,
+        10.016411965833509,
+        9.03577437369573,
+        9.70728564931857,
+        122.8213056543772,
+        9.533743128327513,
+        143.45968503667223,
+        155.32709571771161,
+        141.06113578797667,
+        145.47889938004263,
+        167.35960747366406,
+        138.12559014567552,
+        116.75045269404782,
+        9.4953352412109,
+        170.07468770066882,
+        172.07629747140533,
+        155.39552706715028,
+        9.96413703689447,
+        144.65169143749998,
+        169.142417216155,
+        112.76319305930042,
+        166.30777737368877,
+        123.90774653996388,
+        132.11710295459207,
+        8.76790539542995,
+        9.923343461828647,
+        8.972068632607057,
+        152.30472233633313,
+        99.16466897297458,
+        147.39899220637375,
+        167.5046285318718,
+        158.30798003347417,
+        176.8098098029006,
+        169.2000502496997,
+        8.908205534006084,
+        147.04973272590675,
+        6.01978171115786,
+        12.908947280828421,
+        161.20885865837164,
+        154.1041738397025,
+        12.160112764259807,
+        183.9484777068351,
+        13.885015446203202,
+        103.27604069377547,
+        68.56270954501308,
+        153.39985703870556,
+        188.5641680250544,
+        151.39232245655768,
+        12.050089294787492,
+        183.13047361941102,
+        114.09672566233004,
+        109.88264169611061,
+        118.56400136868983,
+        130.5787804713655,
+        131.9836940557652,
+        139.4770525169641,
+        172.40959805680149,
+        153.8901427211502,
+        13.813000129286806,
+        115.17874112168954,
+        106.36053561017184,
+        174.23315480590185,
+        169.50614560985875,
+        163.7261937236369,
+        11.957456410326769,
+        13.191395790527517,
+        152.6369175652841,
+        164.00689931377138,
+        124.1532871601288,
+        158.7919901602378,
+        126.3012920481913,
+        110.01300143579287,
+        166.51966455859474,
+        161.16730547199728,
+        137.84358628055278,
+        123.59630141121379,
+        109.08989919709578,
+        113.66676604314083,
+        150.33107775824936,
+        155.76683850736808,
+        180.51837524079605,
+        172.49809361722134,
+        171.2412543685433,
+        146.52428847969958,
+        125.58622347928333,
+        132.3599749727434,
+        14.245461215559237,
+        11.847782329285673,
+        127.588030395774,
+        169.03076884237493,
+        160.74766094154035,
+        141.23866796872034,
+        111.11477769019474,
+        12.898248376303878,
+        164.38673745815677,
+        11.791663338710885,
+        148.00296428763687,
+        140.16323874251623,
+        13.719781371654578,
+        14.365561456573998,
+        89.32211257795143,
+        135.76622159161508,
+        175.86032158817434,
+        128.11591032818185,
+        141.79940543502275,
+        13.157166878859636,
+        176.72190145631947,
+        146.35619986228915,
+        98.02869268663022,
+        12.811778712246966,
+        178.01632978541917,
+        11.747222913476566,
+        173.95822172954252,
+        172.47660061508643,
+        13.568556768695913,
+        135.1198744591959,
+        122.01181780569887,
+        165.54722192942938,
+        176.91918611654273,
+        135.48421254380435,
+        152.73279297531656,
+        183.54215600068494,
+        94.73349204436757,
+        165.3454353780521,
+        84.12230571074015,
+        12.73103339619439,
+        14.20676756417383,
+        140.07559949201985,
+        145.7554344839868,
+        148.14304437101455,
+        144.7060493293736,
+        173.19895239158285,
+        107.2396185797313,
+        12.648044488473259,
+        173.48665402770794,
+        161.86284234640354,
+        144.49958539317737,
+        183.33130603616738,
+        149.75316477343017,
+        153.29421953478465,
+        112.95288962968242,
+        12.55136585792316,
+        173.4614521532605,
+        136.4085114015674,
+        173.79337782013562,
+        186.83123762499903,
+        13.449155280150386,
+        155.12272657027916,
+        108.84862656043424,
+        150.6559527232612,
+        161.90374448992205,
+        169.6874597897037,
+        185.13622778245175,
+        13.139280888748093,
+        148.81997444276612,
+        162.826727139871,
+        134.34831771089154,
+        12.478143605322522,
+        14.14151231689335,
+        149.25750191310448,
+        167.44106770036936,
+        170.90279518575983,
+        157.84394143590183,
+        8.296884066877869,
+        7.386407378393029,
+        8.177010477741181,
+        60.00030364994894,
+        137.35670186784466,
+        151.41307554547254,
+        150.53265674110258,
+        141.36182090288565,
+        154.42392832445645,
+        14.407560995301617,
+        113.05280253165802,
+        8.771319013508563,
+        7.756832533799784,
+        7.915167569814742,
+        172.68555416184375,
+        9.246590778625794,
+        112.20973585271739,
+        7.374925625154626,
+        111.78749154901601,
+        119.95753341645725,
+        154.77722687049408,
+        7.888580292543184,
+        149.23559365306315,
+        175.52342653145377,
+        158.75097413261327,
+        114.24446296440473,
+        167.67413927012774,
+        139.92437779140218,
+        153.0991583611961,
+        159.7319334713746,
+        175.77990646480632,
+        175.13373633806003,
+        16.006580912678864,
+        7.364167548538875,
+        8.116426613758023,
+        153.43476931019558,
+        172.67401521610824,
+        137.25039229504623,
+        23.94869767384389,
+        175.71290886984852,
+        160.1562681126053,
+        7.350730708586878,
+        168.18537884347361,
+        177.44645900467552,
+        144.69151322813394,
+        7.870439881886282,
+        158.459275555328,
+        137.04634114797315,
+        8.28967641118504,
+        89.24830243345173,
+        138.72903724038372,
+        144.82472911115988,
+        132.17749274525417,
+        174.5610183503014,
+        187.22444190737485,
+        9.045633413519324,
+        144.6870829429866,
+        94.13484353638168,
+        8.073564944014072,
+        8.763589893125177,
+        144.5449141719037,
+        7.849459359846659,
+        172.45229931306682,
+        148.0354241542905,
+        164.34364023912008,
+        96.62180529545114,
+        168.59818307908336,
+        164.06742901634536,
+        7.6595131274639785,
+        182.08298206042065,
+        174.01829936632802,
+        168.2984620634042,
+        8.26924767633141,
+        127.74401148092224,
+        152.9540589411171,
+        149.70999922953388,
+        138.05370099020382,
+        163.98282164395957,
+        109.96934554907047,
+        8.996791001407733,
+        144.24233837720223,
+        98.41465480385448,
+        176.36506372732458,
+        8.593325518249502,
+        139.963285147378,
+        165.79681947035346,
+        175.45780184642518,
+        8.96329041536684,
+        125.14956588858662,
+        7.352306186940013,
+        177.70817869555052,
+        177.69694242040705,
+        165.7171562780864,
+        159.43363801309405,
+        181.71342376901586,
+        8.928314868453635,
+        147.12698777390037,
+        98.19159288935101,
+        8.060910754944894,
+        8.549652936388815,
+        141.01923634310606,
+        163.73524549575265,
+        160.94825111954881,
+        163.89847325356007,
+        158.8883048029801,
+        101.49136858702906,
+        175.66290061319754,
+        149.61494347618603,
+        167.11717553963226,
+        172.02372197225566,
+        187.6631035218658,
+        8.900520682145078,
+        171.61286914605415,
+        115.54311347996163,
+        169.3438620700623,
+        158.53427223473756,
+        7.62769842231285,
+        7.851152107489017,
+        166.35296262059944,
+        8.869163971328895,
+        174.27048762162556,
+        105.1060664928901,
+        176.44597164262635,
+        183.93578989094215,
+        155.56348841330345,
+        165.030948332619,
+        24.021020986288374,
+        158.54498277925777
     ]
-}
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml
index aa4fde5e512..4b3fe856099 100644
--- a/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml
@@ -44,7 +44,8 @@ MODEL_ARGS:
   --inference-dynamic-batching-buffer-size-gb: 20
   --inference-dynamic-batching-cuda-graph-max-tokens: 2048
   --cuda-graph-impl: local
-  --cuda-graph-scope: full
+  --cuda-graph-scope: full_iteration
+  --no-check-for-nan-in-loss-and-grad: true
   --disable-chunked-prefill: true
   --dist-ckpt-strictness: log_unexpected
   --inference-ckpt-non-strict: true # To handle the extra_state errors
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
index 0d556450cec..40700470348 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 10.92671,
             "2": 10.91589,
-            "3": 10.92552,
-            "4": 10.93168,
-            "5": 10.93015,
-            "6": 10.9259,
-            "7": 10.92646,
-            "8": 10.92323,
-            "9": 10.92778,
-            "10": 10.9168,
-            "11": 10.9178,
-            "12": 10.92446,
-            "13": 10.90961,
-            "14": 10.90627,
-            "15": 10.90112,
-            "16": 10.88691,
-            "17": 10.88827,
-            "18": 10.88554,
-            "19": 10.88654,
-            "20": 10.8377,
-            "21": 10.82717,
-            "22": 10.81535,
-            "23": 10.80831,
-            "24": 10.78061,
-            "25": 10.77774,
-            "26": 10.76115,
-            "27": 10.7495,
-            "28": 10.6922,
-            "29": 10.66686,
-            "30": 10.63118,
-            "31": 10.62182,
-            "32": 10.61591,
-            "33": 10.57843,
-            "34": 10.54531,
-            "35": 10.54625,
-            "36": 10.53479,
-            "37": 10.50533,
-            "38": 10.50383,
-            "39": 10.47322,
-            "40": 10.45095,
-            "41": 10.42606,
-            "42": 10.41475,
-            "43": 10.40064,
-            "44": 10.37006,
-            "45": 10.38168,
-            "46": 10.33484,
-            "47": 10.32444,
-            "48": 10.28749,
-            "49": 10.28608,
-            "50": 10.27697
+            "3": 10.92569,
+            "4": 10.93204,
+            "5": 10.93027,
+            "6": 10.9261,
+            "7": 10.92637,
+            "8": 10.92388,
+            "9": 10.92728,
+            "10": 10.91588,
+            "11": 10.9183,
+            "12": 10.92402,
+            "13": 10.90967,
+            "14": 10.90628,
+            "15": 10.90098,
+            "16": 10.88556,
+            "17": 10.88818,
+            "18": 10.88475,
+            "19": 10.88523,
+            "20": 10.83769,
+            "21": 10.82735,
+            "22": 10.81478,
+            "23": 10.80877,
+            "24": 10.78047,
+            "25": 10.77776,
+            "26": 10.76048,
+            "27": 10.74912,
+            "28": 10.69225,
+            "29": 10.66725,
+            "30": 10.63087,
+            "31": 10.62053,
+            "32": 10.61533,
+            "33": 10.57791,
+            "34": 10.5462,
+            "35": 10.546,
+            "36": 10.53423,
+            "37": 10.50445,
+            "38": 10.50363,
+            "39": 10.47181,
+            "40": 10.44914,
+            "41": 10.42508,
+            "42": 10.41306,
+            "43": 10.39862,
+            "44": 10.36841,
+            "45": 10.37966,
+            "46": 10.33291,
+            "47": 10.32241,
+            "48": 10.28472,
+            "49": 10.28374,
+            "50": 10.27437
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19058.0,
-            "2": 19206.0,
-            "3": 18972.0,
-            "4": 19416.0,
-            "5": 19009.0,
-            "6": 18538.0,
-            "7": 18981.0,
-            "8": 18448.0,
-            "9": 18864.0,
-            "10": 19655.0,
-            "11": 19064.0,
-            "12": 18696.0,
-            "13": 19292.0,
-            "14": 19140.0,
-            "15": 18806.0,
-            "16": 18590.0,
-            "17": 18993.0,
-            "18": 19173.0,
-            "19": 19321.0,
-            "20": 19057.0,
-            "21": 19086.0,
-            "22": 18997.0,
-            "23": 18891.0,
-            "24": 19267.0,
-            "25": 18711.0,
-            "26": 19139.0,
-            "27": 19114.0,
-            "28": 18818.0,
-            "29": 18371.0,
-            "30": 18304.0,
-            "31": 19016.0,
-            "32": 19184.0,
-            "33": 18481.0,
-            "34": 18592.0,
-            "35": 18848.0,
-            "36": 18346.0,
-            "37": 18564.0,
-            "38": 18516.0,
-            "39": 18959.0,
-            "40": 19194.0,
-            "41": 18945.0,
-            "42": 18455.0,
-            "43": 19053.0,
-            "44": 18809.0,
-            "45": 20372.0,
-            "46": 19563.0,
-            "47": 19974.0,
-            "48": 20047.0,
-            "49": 21674.0,
-            "50": 20259.0
+            "1": 36710.0,
+            "2": 37160.0,
+            "3": 37309.0,
+            "4": 36783.0,
+            "5": 36803.0,
+            "6": 36142.0,
+            "7": 36370.0,
+            "8": 36176.0,
+            "9": 37100.0,
+            "10": 37917.0,
+            "11": 36745.0,
+            "12": 35709.0,
+            "13": 37084.0,
+            "14": 37775.0,
+            "15": 36119.0,
+            "16": 36038.0,
+            "17": 36700.0,
+            "18": 37055.0,
+            "19": 36638.0,
+            "20": 36735.0,
+            "21": 36231.0,
+            "22": 36562.0,
+            "23": 37065.0,
+            "24": 37204.0,
+            "25": 35891.0,
+            "26": 36983.0,
+            "27": 36795.0,
+            "28": 36205.0,
+            "29": 36207.0,
+            "30": 35575.0,
+            "31": 36467.0,
+            "32": 37374.0,
+            "33": 35909.0,
+            "34": 35933.0,
+            "35": 36426.0,
+            "36": 36062.0,
+            "37": 36347.0,
+            "38": 35710.0,
+            "39": 36968.0,
+            "40": 37362.0,
+            "41": 36161.0,
+            "42": 36032.0,
+            "43": 37353.0,
+            "44": 37025.0,
+            "45": 39544.0,
+            "46": 38629.0,
+            "47": 38810.0,
+            "48": 38612.0,
+            "49": 42460.0,
+            "50": 39729.0
         }
     },
     "mem-allocated-bytes": {
@@ -120,54 +120,54 @@
         "values": {
             "1": 1027089408.0,
             "2": 1027091968.0,
-            "3": 1027088384.0,
-            "4": 1027088384.0,
+            "3": 1027087360.0,
+            "4": 1027088896.0,
             "5": 1027090944.0,
-            "6": 1027091968.0,
-            "7": 1027088896.0,
-            "8": 1027092992.0,
-            "9": 1027090944.0,
-            "10": 1027090432.0,
-            "11": 1027090944.0,
-            "12": 1027091456.0,
+            "6": 1027090944.0,
+            "7": 1027088384.0,
+            "8": 1027092480.0,
+            "9": 1027091968.0,
+            "10": 1027091456.0,
+            "11": 1027090432.0,
+            "12": 1027090944.0,
             "13": 1027091968.0,
-            "14": 1027090944.0,
-            "15": 1027091456.0,
-            "16": 1027088384.0,
-            "17": 1027088896.0,
+            "14": 1027094528.0,
+            "15": 1027090432.0,
+            "16": 1027088896.0,
+            "17": 1027089408.0,
             "18": 1027090944.0,
-            "19": 1027089920.0,
-            "20": 1027089920.0,
+            "19": 1027088896.0,
+            "20": 1027090944.0,
             "21": 1027092480.0,
-            "22": 1027088896.0,
-            "23": 1027094528.0,
+            "22": 1027090944.0,
+            "23": 1027093504.0,
             "24": 1027091968.0,
             "25": 1027091456.0,
-            "26": 1027089408.0,
+            "26": 1027090944.0,
             "27": 1027087872.0,
-            "28": 1027091456.0,
+            "28": 1027092992.0,
             "29": 1027090432.0,
-            "30": 1027089920.0,
+            "30": 1027090432.0,
             "31": 1027089408.0,
             "32": 1027094528.0,
             "33": 1027094016.0,
-            "34": 1027092480.0,
-            "35": 1027086848.0,
-            "36": 1027088384.0,
+            "34": 1027093504.0,
+            "35": 1027085824.0,
+            "36": 1027087872.0,
             "37": 1027088896.0,
-            "38": 1027090432.0,
-            "39": 1027090432.0,
+            "38": 1027089920.0,
+            "39": 1027089920.0,
             "40": 1027091456.0,
-            "41": 1027088896.0,
+            "41": 1027089920.0,
             "42": 1027088384.0,
             "43": 1027088896.0,
-            "44": 1027091968.0,
-            "45": 1027091456.0,
-            "46": 1027085824.0,
+            "44": 1027090944.0,
+            "45": 1027091968.0,
+            "46": 1027084800.0,
             "47": 1027089920.0,
             "48": 1027088384.0,
-            "49": 1027086848.0,
-            "50": 1027089920.0
+            "49": 1027086336.0,
+            "50": 1027089408.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -182,49 +182,49 @@
             "5": 3298735616.0,
             "6": 3298735616.0,
             "7": 3298735616.0,
-            "8": 3299022336.0,
-            "9": 3299022336.0,
-            "10": 3299022336.0,
-            "11": 3299138048.0,
-            "12": 3299138048.0,
-            "13": 3299225088.0,
-            "14": 3299394048.0,
-            "15": 3299394048.0,
-            "16": 3299394048.0,
-            "17": 3299394048.0,
-            "18": 3299394048.0,
-            "19": 3299809792.0,
-            "20": 3299809792.0,
-            "21": 3299809792.0,
-            "22": 3299809792.0,
-            "23": 3300119552.0,
-            "24": 3300119552.0,
-            "25": 3300119552.0,
-            "26": 3300119552.0,
-            "27": 3300119552.0,
-            "28": 3300119552.0,
-            "29": 3300119552.0,
-            "30": 3300119552.0,
-            "31": 3300119552.0,
-            "32": 3300119552.0,
-            "33": 3300119552.0,
-            "34": 3300416512.0,
-            "35": 3300416512.0,
-            "36": 3300416512.0,
-            "37": 3300416512.0,
-            "38": 3300416512.0,
-            "39": 3300416512.0,
-            "40": 3300416512.0,
-            "41": 3300416512.0,
-            "42": 3300416512.0,
-            "43": 3300416512.0,
-            "44": 3300416512.0,
-            "45": 3300416512.0,
-            "46": 3300416512.0,
-            "47": 3300416512.0,
-            "48": 3300416512.0,
-            "49": 3300416512.0,
-            "50": 3300416512.0
+            "8": 3299167232.0,
+            "9": 3299167232.0,
+            "10": 3299167232.0,
+            "11": 3299167232.0,
+            "12": 3299167232.0,
+            "13": 3299167232.0,
+            "14": 3300327424.0,
+            "15": 3300327424.0,
+            "16": 3300327424.0,
+            "17": 3300327424.0,
+            "18": 3300327424.0,
+            "19": 3300327424.0,
+            "20": 3300327424.0,
+            "21": 3300327424.0,
+            "22": 3300327424.0,
+            "23": 3300327424.0,
+            "24": 3300327424.0,
+            "25": 3300327424.0,
+            "26": 3300327424.0,
+            "27": 3300327424.0,
+            "28": 3300327424.0,
+            "29": 3300327424.0,
+            "30": 3300327424.0,
+            "31": 3300327424.0,
+            "32": 3301105664.0,
+            "33": 3301105664.0,
+            "34": 3301105664.0,
+            "35": 3301105664.0,
+            "36": 3301105664.0,
+            "37": 3301105664.0,
+            "38": 3301105664.0,
+            "39": 3301105664.0,
+            "40": 3301105664.0,
+            "41": 3301105664.0,
+            "42": 3301105664.0,
+            "43": 3301105664.0,
+            "44": 3301105664.0,
+            "45": 3301105664.0,
+            "46": 3301105664.0,
+            "47": 3301105664.0,
+            "48": 3301105664.0,
+            "49": 3301105664.0,
+            "50": 3301105664.0
         }
     },
     "iteration-time": {
@@ -233,55 +233,55 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 7.89786,
-            "3": 0.24594,
-            "4": 0.22051,
-            "5": 0.222,
-            "6": 0.21926,
-            "7": 0.2078,
-            "8": 0.21045,
-            "9": 0.20823,
-            "10": 0.20524,
-            "11": 0.21966,
-            "12": 0.20488,
-            "13": 0.2063,
-            "14": 0.21021,
-            "15": 0.20599,
-            "16": 0.20609,
-            "17": 0.2069,
-            "18": 0.20307,
-            "19": 0.20538,
-            "20": 0.20251,
-            "21": 0.21207,
-            "22": 0.20145,
-            "23": 0.21042,
-            "24": 0.21155,
-            "25": 0.2081,
-            "26": 0.20377,
-            "27": 0.20504,
-            "28": 0.20365,
-            "29": 0.20313,
-            "30": 0.20266,
-            "31": 0.20257,
-            "32": 0.20336,
-            "33": 0.20201,
-            "34": 0.20295,
-            "35": 0.20399,
-            "36": 0.20262,
-            "37": 0.2058,
-            "38": 0.20263,
-            "39": 0.20527,
-            "40": 0.20348,
-            "41": 0.20601,
-            "42": 0.20448,
-            "43": 0.20532,
-            "44": 0.20505,
-            "45": 0.20908,
-            "46": 0.2037,
-            "47": 0.21773,
-            "48": 0.20684,
-            "49": 0.20825,
-            "50": 0.20302
+            "2": 5.35585,
+            "3": 0.30738,
+            "4": 0.3051,
+            "5": 0.29962,
+            "6": 0.29439,
+            "7": 0.28971,
+            "8": 0.29154,
+            "9": 0.2896,
+            "10": 0.285,
+            "11": 0.28601,
+            "12": 0.28351,
+            "13": 0.28073,
+            "14": 0.28692,
+            "15": 0.28298,
+            "16": 0.28931,
+            "17": 0.28692,
+            "18": 0.28464,
+            "19": 0.2809,
+            "20": 0.2801,
+            "21": 0.29964,
+            "22": 0.28577,
+            "23": 0.29322,
+            "24": 0.28538,
+            "25": 0.28139,
+            "26": 0.28632,
+            "27": 0.28307,
+            "28": 0.28328,
+            "29": 0.2898,
+            "30": 0.28102,
+            "31": 0.28581,
+            "32": 0.29226,
+            "33": 0.28565,
+            "34": 0.28151,
+            "35": 0.28469,
+            "36": 0.28547,
+            "37": 0.28361,
+            "38": 0.28658,
+            "39": 0.28216,
+            "40": 0.28637,
+            "41": 0.28332,
+            "42": 0.28626,
+            "43": 0.28098,
+            "44": 0.28017,
+            "45": 0.28351,
+            "46": 0.2833,
+            "47": 0.27921,
+            "48": 0.2816,
+            "49": 0.28999,
+            "50": 0.28489
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
index 586f94b9d87..f640ef3fd7c 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 10.92671,
             "2": 10.91589,
-            "3": 10.92552,
-            "4": 10.93168,
-            "5": 10.93015,
-            "6": 10.9259,
-            "7": 10.92646,
-            "8": 10.92323,
-            "9": 10.92778,
-            "10": 10.9168,
-            "11": 10.9178,
-            "12": 10.92446,
-            "13": 10.90961,
-            "14": 10.90627,
-            "15": 10.90112,
-            "16": 10.88691,
-            "17": 10.88827,
-            "18": 10.88554,
-            "19": 10.88654,
-            "20": 10.8377,
-            "21": 10.82717,
-            "22": 10.81535,
-            "23": 10.80831,
-            "24": 10.78061,
-            "25": 10.77774,
-            "26": 10.76115,
-            "27": 10.7495,
-            "28": 10.6922,
-            "29": 10.66686,
-            "30": 10.63118,
-            "31": 10.62182,
-            "32": 10.61591,
-            "33": 10.57843,
-            "34": 10.54531,
-            "35": 10.54625,
-            "36": 10.53479,
-            "37": 10.50533,
-            "38": 10.50383,
-            "39": 10.47322,
-            "40": 10.45095,
-            "41": 10.42606,
-            "42": 10.41475,
-            "43": 10.40064,
-            "44": 10.37006,
-            "45": 10.38168,
-            "46": 10.33484,
-            "47": 10.32444,
-            "48": 10.28749,
-            "49": 10.28608,
-            "50": 10.27697
+            "3": 10.92569,
+            "4": 10.93204,
+            "5": 10.93027,
+            "6": 10.9261,
+            "7": 10.92637,
+            "8": 10.92388,
+            "9": 10.92728,
+            "10": 10.91588,
+            "11": 10.9183,
+            "12": 10.92402,
+            "13": 10.90967,
+            "14": 10.90628,
+            "15": 10.90098,
+            "16": 10.88556,
+            "17": 10.88818,
+            "18": 10.88475,
+            "19": 10.88523,
+            "20": 10.83769,
+            "21": 10.82735,
+            "22": 10.81478,
+            "23": 10.80877,
+            "24": 10.78047,
+            "25": 10.77776,
+            "26": 10.76048,
+            "27": 10.74912,
+            "28": 10.69225,
+            "29": 10.66725,
+            "30": 10.63087,
+            "31": 10.62053,
+            "32": 10.61533,
+            "33": 10.57791,
+            "34": 10.5462,
+            "35": 10.546,
+            "36": 10.53423,
+            "37": 10.50445,
+            "38": 10.50363,
+            "39": 10.47181,
+            "40": 10.44914,
+            "41": 10.42508,
+            "42": 10.41306,
+            "43": 10.39862,
+            "44": 10.36841,
+            "45": 10.37966,
+            "46": 10.33291,
+            "47": 10.32241,
+            "48": 10.28472,
+            "49": 10.28374,
+            "50": 10.27437
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 19058.0,
-            "2": 19206.0,
-            "3": 18972.0,
-            "4": 19416.0,
-            "5": 19009.0,
-            "6": 18538.0,
-            "7": 18981.0,
-            "8": 18448.0,
-            "9": 18864.0,
-            "10": 19655.0,
-            "11": 19064.0,
-            "12": 18696.0,
-            "13": 19292.0,
-            "14": 19140.0,
-            "15": 18806.0,
-            "16": 18590.0,
-            "17": 18993.0,
-            "18": 19173.0,
-            "19": 19321.0,
-            "20": 19057.0,
-            "21": 19086.0,
-            "22": 18997.0,
-            "23": 18891.0,
-            "24": 19267.0,
-            "25": 18711.0,
-            "26": 19139.0,
-            "27": 19114.0,
-            "28": 18818.0,
-            "29": 18371.0,
-            "30": 18304.0,
-            "31": 19016.0,
-            "32": 19184.0,
-            "33": 18481.0,
-            "34": 18592.0,
-            "35": 18848.0,
-            "36": 18346.0,
-            "37": 18564.0,
-            "38": 18516.0,
-            "39": 18959.0,
-            "40": 19194.0,
-            "41": 18945.0,
-            "42": 18455.0,
-            "43": 19053.0,
-            "44": 18809.0,
-            "45": 20372.0,
-            "46": 19563.0,
-            "47": 19974.0,
-            "48": 20047.0,
-            "49": 21674.0,
-            "50": 20259.0
+            "1": 36710.0,
+            "2": 37160.0,
+            "3": 37309.0,
+            "4": 36783.0,
+            "5": 36803.0,
+            "6": 36142.0,
+            "7": 36370.0,
+            "8": 36176.0,
+            "9": 37100.0,
+            "10": 37917.0,
+            "11": 36745.0,
+            "12": 35709.0,
+            "13": 37084.0,
+            "14": 37775.0,
+            "15": 36119.0,
+            "16": 36038.0,
+            "17": 36700.0,
+            "18": 37055.0,
+            "19": 36638.0,
+            "20": 36735.0,
+            "21": 36231.0,
+            "22": 36562.0,
+            "23": 37065.0,
+            "24": 37204.0,
+            "25": 35891.0,
+            "26": 36983.0,
+            "27": 36795.0,
+            "28": 36205.0,
+            "29": 36207.0,
+            "30": 35575.0,
+            "31": 36467.0,
+            "32": 37374.0,
+            "33": 35909.0,
+            "34": 35933.0,
+            "35": 36426.0,
+            "36": 36062.0,
+            "37": 36347.0,
+            "38": 35710.0,
+            "39": 36968.0,
+            "40": 37362.0,
+            "41": 36161.0,
+            "42": 36032.0,
+            "43": 37353.0,
+            "44": 37025.0,
+            "45": 39544.0,
+            "46": 38629.0,
+            "47": 38810.0,
+            "48": 38612.0,
+            "49": 42460.0,
+            "50": 39729.0
         }
     },
     "mem-allocated-bytes": {
@@ -120,54 +120,54 @@
         "values": {
             "1": 1027089408.0,
             "2": 1027091968.0,
-            "3": 1027088384.0,
-            "4": 1027088384.0,
+            "3": 1027087360.0,
+            "4": 1027088896.0,
             "5": 1027090944.0,
-            "6": 1027091968.0,
-            "7": 1027088896.0,
-            "8": 1027092992.0,
-            "9": 1027090944.0,
-            "10": 1027090432.0,
-            "11": 1027090944.0,
-            "12": 1027091456.0,
+            "6": 1027090944.0,
+            "7": 1027088384.0,
+            "8": 1027092480.0,
+            "9": 1027091968.0,
+            "10": 1027091456.0,
+            "11": 1027090432.0,
+            "12": 1027090944.0,
             "13": 1027091968.0,
-            "14": 1027090944.0,
-            "15": 1027091456.0,
-            "16": 1027088384.0,
-            "17": 1027088896.0,
+            "14": 1027094528.0,
+            "15": 1027090432.0,
+            "16": 1027088896.0,
+            "17": 1027089408.0,
             "18": 1027090944.0,
-            "19": 1027089920.0,
-            "20": 1027089920.0,
+            "19": 1027088896.0,
+            "20": 1027090944.0,
             "21": 1027092480.0,
-            "22": 1027088896.0,
-            "23": 1027094528.0,
+            "22": 1027090944.0,
+            "23": 1027093504.0,
             "24": 1027091968.0,
             "25": 1027091456.0,
-            "26": 1027089408.0,
+            "26": 1027090944.0,
             "27": 1027087872.0,
-            "28": 1027091456.0,
+            "28": 1027092992.0,
             "29": 1027090432.0,
-            "30": 1027089920.0,
+            "30": 1027090432.0,
             "31": 1027089408.0,
             "32": 1027094528.0,
             "33": 1027094016.0,
-            "34": 1027092480.0,
-            "35": 1027086848.0,
-            "36": 1027088384.0,
+            "34": 1027093504.0,
+            "35": 1027085824.0,
+            "36": 1027087872.0,
             "37": 1027088896.0,
-            "38": 1027090432.0,
-            "39": 1027090432.0,
+            "38": 1027089920.0,
+            "39": 1027089920.0,
             "40": 1027091456.0,
-            "41": 1027088896.0,
+            "41": 1027089920.0,
             "42": 1027088384.0,
             "43": 1027088896.0,
-            "44": 1027091968.0,
-            "45": 1027091456.0,
-            "46": 1027085824.0,
+            "44": 1027090944.0,
+            "45": 1027091968.0,
+            "46": 1027084800.0,
             "47": 1027089920.0,
             "48": 1027088384.0,
-            "49": 1027086848.0,
-            "50": 1027089920.0
+            "49": 1027086336.0,
+            "50": 1027089408.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -182,49 +182,49 @@
             "5": 3298735616.0,
             "6": 3298735616.0,
             "7": 3298735616.0,
-            "8": 3299022336.0,
-            "9": 3299022336.0,
-            "10": 3299022336.0,
-            "11": 3299138048.0,
-            "12": 3299138048.0,
-            "13": 3299225088.0,
-            "14": 3299394048.0,
-            "15": 3299394048.0,
-            "16": 3299394048.0,
-            "17": 3299394048.0,
-            "18": 3299394048.0,
-            "19": 3299809792.0,
-            "20": 3299809792.0,
-            "21": 3299809792.0,
-            "22": 3299809792.0,
-            "23": 3300119552.0,
-            "24": 3300119552.0,
-            "25": 3300119552.0,
-            "26": 3300119552.0,
-            "27": 3300119552.0,
-            "28": 3300119552.0,
-            "29": 3300119552.0,
-            "30": 3300119552.0,
-            "31": 3300119552.0,
-            "32": 3300119552.0,
-            "33": 3300119552.0,
-            "34": 3300416512.0,
-            "35": 3300416512.0,
-            "36": 3300416512.0,
-            "37": 3300416512.0,
-            "38": 3300416512.0,
-            "39": 3300416512.0,
-            "40": 3300416512.0,
-            "41": 3300416512.0,
-            "42": 3300416512.0,
-            "43": 3300416512.0,
-            "44": 3300416512.0,
-            "45": 3300416512.0,
-            "46": 3300416512.0,
-            "47": 3300416512.0,
-            "48": 3300416512.0,
-            "49": 3300416512.0,
-            "50": 3300416512.0
+            "8": 3299167232.0,
+            "9": 3299167232.0,
+            "10": 3299167232.0,
+            "11": 3299167232.0,
+            "12": 3299167232.0,
+            "13": 3299167232.0,
+            "14": 3300327424.0,
+            "15": 3300327424.0,
+            "16": 3300327424.0,
+            "17": 3300327424.0,
+            "18": 3300327424.0,
+            "19": 3300327424.0,
+            "20": 3300327424.0,
+            "21": 3300327424.0,
+            "22": 3300327424.0,
+            "23": 3300327424.0,
+            "24": 3300327424.0,
+            "25": 3300327424.0,
+            "26": 3300327424.0,
+            "27": 3300327424.0,
+            "28": 3300327424.0,
+            "29": 3300327424.0,
+            "30": 3300327424.0,
+            "31": 3300327424.0,
+            "32": 3301105664.0,
+            "33": 3301105664.0,
+            "34": 3301105664.0,
+            "35": 3301105664.0,
+            "36": 3301105664.0,
+            "37": 3301105664.0,
+            "38": 3301105664.0,
+            "39": 3301105664.0,
+            "40": 3301105664.0,
+            "41": 3301105664.0,
+            "42": 3301105664.0,
+            "43": 3301105664.0,
+            "44": 3301105664.0,
+            "45": 3301105664.0,
+            "46": 3301105664.0,
+            "47": 3301105664.0,
+            "48": 3301105664.0,
+            "49": 3301105664.0,
+            "50": 3301105664.0
         }
     },
     "iteration-time": {
@@ -233,55 +233,55 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 7.23651,
-            "3": 0.24222,
-            "4": 0.22131,
-            "5": 0.2271,
-            "6": 0.22305,
-            "7": 0.21362,
-            "8": 0.21345,
-            "9": 0.21177,
-            "10": 0.20554,
-            "11": 0.21683,
-            "12": 0.21959,
-            "13": 0.23214,
-            "14": 0.21046,
-            "15": 0.2093,
-            "16": 0.20781,
-            "17": 0.21094,
-            "18": 0.20855,
-            "19": 0.20679,
-            "20": 0.20604,
-            "21": 0.21437,
-            "22": 0.20598,
-            "23": 0.20879,
-            "24": 0.20414,
-            "25": 0.20266,
-            "26": 0.20454,
-            "27": 0.20634,
-            "28": 0.20309,
-            "29": 0.20238,
-            "30": 0.20203,
-            "31": 0.20437,
-            "32": 0.20127,
-            "33": 0.20216,
-            "34": 0.20283,
-            "35": 0.20336,
-            "36": 0.20293,
-            "37": 0.20654,
-            "38": 0.20237,
-            "39": 0.20306,
-            "40": 0.20384,
-            "41": 0.20686,
-            "42": 0.20485,
-            "43": 0.20433,
-            "44": 0.20288,
-            "45": 0.20816,
-            "46": 0.20343,
-            "47": 0.2071,
-            "48": 0.20408,
-            "49": 0.2097,
-            "50": 0.20466
+            "2": 7.58728,
+            "3": 0.31009,
+            "4": 0.30574,
+            "5": 0.30849,
+            "6": 0.30568,
+            "7": 0.304,
+            "8": 0.28938,
+            "9": 0.29273,
+            "10": 0.28679,
+            "11": 0.29223,
+            "12": 0.29094,
+            "13": 0.28669,
+            "14": 0.28785,
+            "15": 0.28633,
+            "16": 0.2907,
+            "17": 0.28571,
+            "18": 0.28649,
+            "19": 0.28924,
+            "20": 0.28411,
+            "21": 0.29161,
+            "22": 0.2845,
+            "23": 0.29217,
+            "24": 0.28492,
+            "25": 0.28577,
+            "26": 0.28786,
+            "27": 0.2893,
+            "28": 0.29073,
+            "29": 0.28506,
+            "30": 0.28519,
+            "31": 0.28397,
+            "32": 0.2904,
+            "33": 0.29082,
+            "34": 0.28599,
+            "35": 0.28963,
+            "36": 0.28976,
+            "37": 0.28557,
+            "38": 0.29164,
+            "39": 0.29238,
+            "40": 0.28427,
+            "41": 0.28783,
+            "42": 0.28875,
+            "43": 0.28478,
+            "44": 0.28439,
+            "45": 0.29078,
+            "46": 0.28385,
+            "47": 0.28272,
+            "48": 0.28312,
+            "49": 0.29468,
+            "50": 0.28837
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
index 0e601bc661a..4a25865ef60 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
@@ -6,54 +6,54 @@
         "values": {
             "1": 10.90768,
             "2": 10.90727,
-            "3": 10.9168,
-            "4": 10.90829,
-            "5": 10.91479,
-            "6": 10.89485,
-            "7": 10.90737,
-            "8": 10.90882,
-            "9": 10.90915,
-            "10": 10.91068,
-            "11": 10.90062,
-            "12": 10.89878,
-            "13": 10.88648,
-            "14": 10.88227,
-            "15": 10.87325,
-            "16": 10.85233,
-            "17": 10.85658,
-            "18": 10.84766,
-            "19": 10.85521,
-            "20": 10.77651,
-            "21": 10.76089,
-            "22": 10.75994,
-            "23": 10.7431,
-            "24": 10.70783,
-            "25": 10.70972,
-            "26": 10.69229,
-            "27": 10.66881,
-            "28": 10.60562,
-            "29": 10.57195,
-            "30": 10.54188,
-            "31": 10.53201,
-            "32": 10.51676,
-            "33": 10.48119,
-            "34": 10.44953,
-            "35": 10.44578,
-            "36": 10.42078,
-            "37": 10.40052,
-            "38": 10.4042,
-            "39": 10.36985,
-            "40": 10.35254,
-            "41": 10.33024,
-            "42": 10.31102,
-            "43": 10.29815,
-            "44": 10.27127,
-            "45": 10.28382,
-            "46": 10.24095,
-            "47": 10.23461,
-            "48": 10.19191,
-            "49": 10.19522,
-            "50": 10.19066
+            "3": 10.91677,
+            "4": 10.90838,
+            "5": 10.91536,
+            "6": 10.89522,
+            "7": 10.90774,
+            "8": 10.90815,
+            "9": 10.90916,
+            "10": 10.91026,
+            "11": 10.9013,
+            "12": 10.89896,
+            "13": 10.88718,
+            "14": 10.88255,
+            "15": 10.87321,
+            "16": 10.85207,
+            "17": 10.85744,
+            "18": 10.84755,
+            "19": 10.85504,
+            "20": 10.77576,
+            "21": 10.76177,
+            "22": 10.75967,
+            "23": 10.74285,
+            "24": 10.70808,
+            "25": 10.70994,
+            "26": 10.6922,
+            "27": 10.66835,
+            "28": 10.60509,
+            "29": 10.57149,
+            "30": 10.54122,
+            "31": 10.53124,
+            "32": 10.51525,
+            "33": 10.48026,
+            "34": 10.44887,
+            "35": 10.44515,
+            "36": 10.41928,
+            "37": 10.39906,
+            "38": 10.40257,
+            "39": 10.3684,
+            "40": 10.35058,
+            "41": 10.32832,
+            "42": 10.30907,
+            "43": 10.29586,
+            "44": 10.26915,
+            "45": 10.28138,
+            "46": 10.23855,
+            "47": 10.23169,
+            "48": 10.18904,
+            "49": 10.19272,
+            "50": 10.18773
         }
     },
     "num-zeros": {
@@ -61,56 +61,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 16760.0,
-            "2": 16448.0,
-            "3": 16457.0,
-            "4": 16370.0,
-            "5": 16128.0,
-            "6": 15986.0,
-            "7": 16871.0,
-            "8": 16013.0,
-            "9": 16494.0,
-            "10": 16663.0,
-            "11": 16350.0,
-            "12": 15746.0,
-            "13": 16650.0,
-            "14": 16501.0,
-            "15": 16034.0,
-            "16": 16079.0,
-            "17": 16562.0,
-            "18": 16415.0,
-            "19": 16924.0,
-            "20": 16413.0,
-            "21": 16333.0,
-            "22": 16350.0,
-            "23": 16127.0,
-            "24": 16475.0,
-            "25": 15776.0,
-            "26": 16841.0,
-            "27": 16638.0,
-            "28": 16195.0,
-            "29": 16566.0,
-            "30": 16223.0,
-            "31": 16976.0,
-            "32": 17083.0,
-            "33": 17181.0,
-            "34": 17125.0,
-            "35": 18157.0,
-            "36": 17429.0,
-            "37": 17735.0,
-            "38": 17898.0,
-            "39": 18505.0,
-            "40": 19060.0,
-            "41": 18163.0,
-            "42": 18145.0,
-            "43": 18663.0,
-            "44": 18601.0,
-            "45": 20633.0,
-            "46": 20049.0,
-            "47": 19866.0,
-            "48": 20108.0,
-            "49": 21960.0,
-            "50": 20138.0
+            "1": 32335.0,
+            "2": 31841.0,
+            "3": 31775.0,
+            "4": 32249.0,
+            "5": 31694.0,
+            "6": 31106.0,
+            "7": 32512.0,
+            "8": 30765.0,
+            "9": 32296.0,
+            "10": 32743.0,
+            "11": 31984.0,
+            "12": 31014.0,
+            "13": 32552.0,
+            "14": 32744.0,
+            "15": 31135.0,
+            "16": 30964.0,
+            "17": 32132.0,
+            "18": 31974.0,
+            "19": 32648.0,
+            "20": 32452.0,
+            "21": 31857.0,
+            "22": 31848.0,
+            "23": 32281.0,
+            "24": 32970.0,
+            "25": 31299.0,
+            "26": 32672.0,
+            "27": 32851.0,
+            "28": 32568.0,
+            "29": 32585.0,
+            "30": 32739.0,
+            "31": 33858.0,
+            "32": 34616.0,
+            "33": 34212.0,
+            "34": 33982.0,
+            "35": 35475.0,
+            "36": 34732.0,
+            "37": 35645.0,
+            "38": 34867.0,
+            "39": 36578.0,
+            "40": 37563.0,
+            "41": 36481.0,
+            "42": 35997.0,
+            "43": 37571.0,
+            "44": 37041.0,
+            "45": 41119.0,
+            "46": 39345.0,
+            "47": 38921.0,
+            "48": 39898.0,
+            "49": 43851.0,
+            "50": 39868.0
         }
     },
     "mem-allocated-bytes": {
@@ -118,56 +118,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 1558401536.0,
-            "2": 1558716416.0,
-            "3": 1559734784.0,
-            "4": 1558522880.0,
-            "5": 1558433280.0,
-            "6": 1558899712.0,
-            "7": 1559085568.0,
-            "8": 1559302144.0,
-            "9": 1558675968.0,
-            "10": 1559221248.0,
-            "11": 1558368768.0,
-            "12": 1558981632.0,
-            "13": 1559165440.0,
-            "14": 1558203392.0,
-            "15": 1558620672.0,
-            "16": 1558203392.0,
-            "17": 1558254080.0,
-            "18": 1559229440.0,
-            "19": 1558510080.0,
-            "20": 1558384640.0,
-            "21": 1559245312.0,
-            "22": 1558439936.0,
-            "23": 1558500864.0,
-            "24": 1558304768.0,
-            "25": 1558666752.0,
-            "26": 1558304768.0,
-            "27": 1558846976.0,
-            "28": 1558304768.0,
-            "29": 1558355456.0,
-            "30": 1559102464.0,
-            "31": 1559019008.0,
-            "32": 1559699456.0,
-            "33": 1559100928.0,
-            "34": 1558921216.0,
-            "35": 1558616576.0,
-            "36": 1558406144.0,
-            "37": 1559692800.0,
-            "38": 1558406144.0,
-            "39": 1558585856.0,
-            "40": 1559957504.0,
-            "41": 1559263744.0,
-            "42": 1558507520.0,
-            "43": 1558776320.0,
-            "44": 1559255040.0,
-            "45": 1558456832.0,
-            "46": 1558831104.0,
-            "47": 1558648320.0,
-            "48": 1558507520.0,
-            "49": 1559791104.0,
-            "50": 1558966272.0
+            "1": 1558936064.0,
+            "2": 1559142400.0,
+            "3": 1558418944.0,
+            "4": 1558473728.0,
+            "5": 1558619136.0,
+            "6": 1558418944.0,
+            "7": 1558368256.0,
+            "8": 1558418944.0,
+            "9": 1559001088.0,
+            "10": 1558676992.0,
+            "11": 1559100416.0,
+            "12": 1559344640.0,
+            "13": 1558738944.0,
+            "14": 1558446592.0,
+            "15": 1558907904.0,
+            "16": 1559096832.0,
+            "17": 1558469632.0,
+            "18": 1558520320.0,
+            "19": 1559102464.0,
+            "20": 1559440896.0,
+            "21": 1558653952.0,
+            "22": 1558520320.0,
+            "23": 1558571008.0,
+            "24": 1559296512.0,
+            "25": 1559556096.0,
+            "26": 1558520320.0,
+            "27": 1558571008.0,
+            "28": 1558621696.0,
+            "29": 1558571008.0,
+            "30": 1560001024.0,
+            "31": 1558796288.0,
+            "32": 1558621696.0,
+            "33": 1558571008.0,
+            "34": 1559272960.0,
+            "35": 1559274496.0,
+            "36": 1558780416.0,
+            "37": 1558672384.0,
+            "38": 1558621696.0,
+            "39": 1559065600.0,
+            "40": 1559026176.0,
+            "41": 1559201792.0,
+            "42": 1558960640.0,
+            "43": 1558672384.0,
+            "44": 1559427584.0,
+            "45": 1558672384.0,
+            "46": 1558723072.0,
+            "47": 1558773760.0,
+            "48": 1558723072.0,
+            "49": 1559203840.0,
+            "50": 1558723072.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -175,56 +175,56 @@
         "end_step": 50,
         "step_interval": 1,
         "values": {
-            "1": 3480202240.0,
-            "2": 4039383552.0,
-            "3": 4045734912.0,
-            "4": 4052123136.0,
-            "5": 4052123136.0,
-            "6": 4053458432.0,
-            "7": 4054095872.0,
-            "8": 4058186240.0,
-            "9": 4059530240.0,
-            "10": 4061010432.0,
-            "11": 4061010432.0,
-            "12": 4061010432.0,
-            "13": 4061010432.0,
-            "14": 4061010432.0,
-            "15": 4061010432.0,
-            "16": 4061010432.0,
-            "17": 4061010432.0,
-            "18": 4061010432.0,
-            "19": 4061010432.0,
-            "20": 4061010432.0,
-            "21": 4061010432.0,
-            "22": 4061010432.0,
-            "23": 4061010432.0,
-            "24": 4061010432.0,
-            "25": 4061010432.0,
-            "26": 4061010432.0,
-            "27": 4061010432.0,
-            "28": 4061010432.0,
-            "29": 4061010432.0,
-            "30": 4061010432.0,
-            "31": 4061010432.0,
-            "32": 4061010432.0,
-            "33": 4061010432.0,
-            "34": 4061010432.0,
-            "35": 4061010432.0,
-            "36": 4061010432.0,
-            "37": 4061010432.0,
-            "38": 4061010432.0,
-            "39": 4061010432.0,
-            "40": 4061010432.0,
-            "41": 4061010432.0,
-            "42": 4061010432.0,
-            "43": 4061010432.0,
-            "44": 4061010432.0,
-            "45": 4061010432.0,
-            "46": 4061010432.0,
-            "47": 4061010432.0,
-            "48": 4061010432.0,
-            "49": 4061010432.0,
-            "50": 4061010432.0
+            "1": 3480480768.0,
+            "2": 4041655808.0,
+            "3": 4048918528.0,
+            "4": 4048918528.0,
+            "5": 4048918528.0,
+            "6": 4053445632.0,
+            "7": 4053445632.0,
+            "8": 4053990400.0,
+            "9": 4056294912.0,
+            "10": 4058575872.0,
+            "11": 4058575872.0,
+            "12": 4058575872.0,
+            "13": 4058575872.0,
+            "14": 4058575872.0,
+            "15": 4058575872.0,
+            "16": 4058575872.0,
+            "17": 4058575872.0,
+            "18": 4058575872.0,
+            "19": 4058575872.0,
+            "20": 4058575872.0,
+            "21": 4058575872.0,
+            "22": 4058575872.0,
+            "23": 4058575872.0,
+            "24": 4058575872.0,
+            "25": 4058575872.0,
+            "26": 4058575872.0,
+            "27": 4058575872.0,
+            "28": 4058575872.0,
+            "29": 4058575872.0,
+            "30": 4058575872.0,
+            "31": 4058575872.0,
+            "32": 4058575872.0,
+            "33": 4058575872.0,
+            "34": 4058575872.0,
+            "35": 4058575872.0,
+            "36": 4058575872.0,
+            "37": 4058575872.0,
+            "38": 4058575872.0,
+            "39": 4058575872.0,
+            "40": 4058575872.0,
+            "41": 4058575872.0,
+            "42": 4058575872.0,
+            "43": 4058575872.0,
+            "44": 4058575872.0,
+            "45": 4058575872.0,
+            "46": 4058575872.0,
+            "47": 4058575872.0,
+            "48": 4058575872.0,
+            "49": 4058575872.0,
+            "50": 4058575872.0
         }
     },
     "iteration-time": {
@@ -233,55 +233,55 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 11.67698,
-            "3": 0.39155,
-            "4": 0.31689,
-            "5": 0.294,
-            "6": 0.31141,
-            "7": 0.29128,
-            "8": 0.28675,
-            "9": 0.28172,
-            "10": 0.27347,
-            "11": 0.26642,
-            "12": 0.27974,
-            "13": 0.27332,
-            "14": 0.36787,
-            "15": 0.26967,
-            "16": 0.26447,
-            "17": 0.26033,
-            "18": 0.2662,
-            "19": 0.28876,
-            "20": 0.27381,
-            "21": 0.26827,
-            "22": 0.28438,
-            "23": 0.27253,
-            "24": 0.27903,
-            "25": 0.27474,
-            "26": 0.28579,
-            "27": 0.28072,
-            "28": 0.2816,
-            "29": 0.32547,
-            "30": 0.27477,
-            "31": 0.27095,
-            "32": 0.27719,
-            "33": 0.26688,
-            "34": 0.27227,
-            "35": 0.2837,
-            "36": 0.27295,
-            "37": 0.26868,
-            "38": 0.26936,
-            "39": 0.27392,
-            "40": 0.2649,
-            "41": 0.27268,
-            "42": 0.26786,
-            "43": 0.26041,
-            "44": 0.2684,
-            "45": 0.26786,
-            "46": 0.26105,
-            "47": 0.26729,
-            "48": 0.26353,
-            "49": 0.27083,
-            "50": 0.26181
+            "2": 9.11114,
+            "3": 0.46745,
+            "4": 0.6067,
+            "5": 0.59171,
+            "6": 0.4369,
+            "7": 0.41515,
+            "8": 0.45255,
+            "9": 0.40714,
+            "10": 0.40441,
+            "11": 0.39176,
+            "12": 0.41251,
+            "13": 0.38216,
+            "14": 0.39025,
+            "15": 0.4058,
+            "16": 0.38848,
+            "17": 0.35936,
+            "18": 0.36512,
+            "19": 0.38989,
+            "20": 0.39482,
+            "21": 0.40064,
+            "22": 0.40323,
+            "23": 0.38135,
+            "24": 0.37421,
+            "25": 0.36701,
+            "26": 0.358,
+            "27": 0.35679,
+            "28": 0.37525,
+            "29": 0.37141,
+            "30": 0.36686,
+            "31": 0.37147,
+            "32": 0.3724,
+            "33": 0.37369,
+            "34": 0.36853,
+            "35": 0.37221,
+            "36": 0.36497,
+            "37": 0.36365,
+            "38": 0.36316,
+            "39": 0.36521,
+            "40": 0.3622,
+            "41": 0.36129,
+            "42": 0.37003,
+            "43": 0.36053,
+            "44": 0.36244,
+            "45": 0.36485,
+            "46": 0.35324,
+            "47": 0.36076,
+            "48": 0.3573,
+            "49": 0.36368,
+            "50": 0.35292
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
index ef8ee741272..a76d8667ec6 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
@@ -2,109 +2,343 @@
     "lm loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.0475,
-            "5": 9.43078,
-            "10": 8.89238,
-            "15": 7.93732,
-            "20": 7.77942,
-            "25": 7.61408,
-            "30": 7.57234,
-            "35": 7.15189,
-            "40": 7.48085,
-            "45": 7.12056,
-            "50": 6.96054
+            "1": 11.0474,
+            "2": 11.03765,
+            "3": 9.6074,
+            "4": 9.2648,
+            "5": 9.42291,
+            "6": 9.09511,
+            "7": 9.12753,
+            "8": 8.75686,
+            "9": 8.61627,
+            "10": 8.89295,
+            "11": 8.37933,
+            "12": 8.39932,
+            "13": 8.32626,
+            "14": 7.81437,
+            "15": 7.93661,
+            "16": 7.99492,
+            "17": 7.95458,
+            "18": 7.67733,
+            "19": 8.07234,
+            "20": 7.78815,
+            "21": 7.48342,
+            "22": 7.48177,
+            "23": 7.34879,
+            "24": 7.34465,
+            "25": 7.61117,
+            "26": 7.01605,
+            "27": 7.54878,
+            "28": 7.26655,
+            "29": 7.43507,
+            "30": 7.56529,
+            "31": 7.32669,
+            "32": 7.50645,
+            "33": 7.5577,
+            "34": 7.60977,
+            "35": 7.14607,
+            "36": 7.00597,
+            "37": 7.34071,
+            "38": 7.11796,
+            "39": 7.46649,
+            "40": 7.47443,
+            "41": 7.41032,
+            "42": 7.17365,
+            "43": 7.16495,
+            "44": 7.34265,
+            "45": 7.10918,
+            "46": 6.83934,
+            "47": 7.22335,
+            "48": 7.05732,
+            "49": 7.53394,
+            "50": 6.95951
         }
     },
     "num-zeros": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 38802620.0,
-            "5": 243556240.0,
-            "10": 716187584.0,
-            "15": 614358336.0,
-            "20": 677963584.0,
-            "25": 736321856.0,
-            "30": 505223648.0,
-            "35": 548946176.0,
-            "40": 412329664.0,
-            "45": 376634624.0,
-            "50": 205546672.0
+            "1": 38802536.0,
+            "2": 38543540.0,
+            "3": 38739408.0,
+            "4": 273756736.0,
+            "5": 205853584.0,
+            "6": 284244640.0,
+            "7": 652227968.0,
+            "8": 790994816.0,
+            "9": 762295424.0,
+            "10": 665870592.0,
+            "11": 618336384.0,
+            "12": 639816192.0,
+            "13": 699169600.0,
+            "14": 620502464.0,
+            "15": 623699456.0,
+            "16": 847396864.0,
+            "17": 601834432.0,
+            "18": 642855744.0,
+            "19": 668078912.0,
+            "20": 574651008.0,
+            "21": 608590080.0,
+            "22": 599821504.0,
+            "23": 558380672.0,
+            "24": 688014720.0,
+            "25": 500623296.0,
+            "26": 532887808.0,
+            "27": 506526976.0,
+            "28": 450900800.0,
+            "29": 528748480.0,
+            "30": 445603872.0,
+            "31": 457250368.0,
+            "32": 400653888.0,
+            "33": 347460640.0,
+            "34": 268919904.0,
+            "35": 495515584.0,
+            "36": 332139008.0,
+            "37": 446760768.0,
+            "38": 391328576.0,
+            "39": 378290400.0,
+            "40": 261331328.0,
+            "41": 368680832.0,
+            "42": 337485280.0,
+            "43": 337755968.0,
+            "44": 324657920.0,
+            "45": 216104608.0,
+            "46": 218159872.0,
+            "47": 302569184.0,
+            "48": 296505312.0,
+            "49": 280170176.0,
+            "50": 268486912.0
         }
     },
     "mem-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 7321331200.0,
-            "5": 7321333248.0,
-            "10": 7321333248.0,
-            "15": 7321333248.0,
-            "20": 7321333248.0,
-            "25": 7321333248.0,
-            "30": 7321333248.0,
-            "35": 7321333248.0,
-            "40": 7321333248.0,
-            "45": 7321333248.0,
-            "50": 7321333248.0
+            "1": 7316093440.0,
+            "2": 7316095488.0,
+            "3": 7316095488.0,
+            "4": 7316095488.0,
+            "5": 7316095488.0,
+            "6": 7316095488.0,
+            "7": 7316095488.0,
+            "8": 7316095488.0,
+            "9": 7316095488.0,
+            "10": 7316095488.0,
+            "11": 7316095488.0,
+            "12": 7316095488.0,
+            "13": 7316095488.0,
+            "14": 7316095488.0,
+            "15": 7316095488.0,
+            "16": 7316095488.0,
+            "17": 7316095488.0,
+            "18": 7316095488.0,
+            "19": 7316095488.0,
+            "20": 7316095488.0,
+            "21": 7316095488.0,
+            "22": 7316095488.0,
+            "23": 7316095488.0,
+            "24": 7316095488.0,
+            "25": 7316095488.0,
+            "26": 7316095488.0,
+            "27": 7316095488.0,
+            "28": 7316095488.0,
+            "29": 7316095488.0,
+            "30": 7316095488.0,
+            "31": 7316095488.0,
+            "32": 7316095488.0,
+            "33": 7316095488.0,
+            "34": 7316095488.0,
+            "35": 7316095488.0,
+            "36": 7316095488.0,
+            "37": 7316095488.0,
+            "38": 7316095488.0,
+            "39": 7316095488.0,
+            "40": 7316095488.0,
+            "41": 7316095488.0,
+            "42": 7316095488.0,
+            "43": 7316095488.0,
+            "44": 7316095488.0,
+            "45": 7316095488.0,
+            "46": 7316095488.0,
+            "47": 7316095488.0,
+            "48": 7316095488.0,
+            "49": 7316095488.0,
+            "50": 7316095488.0
         }
     },
     "mem-max-allocated-bytes": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 53176152064.0,
-            "5": 55926337536.0,
-            "10": 55926337536.0,
-            "15": 55926337536.0,
-            "20": 55926337536.0,
-            "25": 56534257664.0,
-            "30": 57393635328.0,
-            "35": 57393635328.0,
-            "40": 57578217472.0,
-            "45": 57578217472.0,
-            "50": 57578217472.0
+            "1": 53549867008.0,
+            "2": 56295710720.0,
+            "3": 56295710720.0,
+            "4": 56295710720.0,
+            "5": 56295710720.0,
+            "6": 56295710720.0,
+            "7": 56295710720.0,
+            "8": 56295710720.0,
+            "9": 56295710720.0,
+            "10": 56295710720.0,
+            "11": 56295710720.0,
+            "12": 56295710720.0,
+            "13": 56295710720.0,
+            "14": 56295710720.0,
+            "15": 56295710720.0,
+            "16": 56295710720.0,
+            "17": 56295710720.0,
+            "18": 56295710720.0,
+            "19": 56295710720.0,
+            "20": 56295710720.0,
+            "21": 56295710720.0,
+            "22": 56295710720.0,
+            "23": 56295710720.0,
+            "24": 56738553856.0,
+            "25": 56738553856.0,
+            "26": 56777162752.0,
+            "27": 56777162752.0,
+            "28": 56777162752.0,
+            "29": 56777162752.0,
+            "30": 56777162752.0,
+            "31": 56777162752.0,
+            "32": 56777162752.0,
+            "33": 56777162752.0,
+            "34": 56824344576.0,
+            "35": 57080135680.0,
+            "36": 57331695616.0,
+            "37": 57331695616.0,
+            "38": 57577013248.0,
+            "39": 57577013248.0,
+            "40": 57577013248.0,
+            "41": 57577013248.0,
+            "42": 57577013248.0,
+            "43": 57587191808.0,
+            "44": 57596944384.0,
+            "45": 57705652224.0,
+            "46": 57790390272.0,
+            "47": 57790390272.0,
+            "48": 57790390272.0,
+            "49": 57790390272.0,
+            "50": 57790390272.0
         }
     },
     "mtp_1 loss": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 11.0776,
-            "5": 9.87653,
-            "10": 9.02332,
-            "15": 7.91471,
-            "20": 7.75886,
-            "25": 7.56825,
-            "30": 7.53841,
-            "35": 7.12192,
-            "40": 7.44579,
-            "45": 7.09307,
-            "50": 6.94739
+            "1": 11.07756,
+            "2": 11.07651,
+            "3": 10.53063,
+            "4": 10.08611,
+            "5": 9.87524,
+            "6": 9.55366,
+            "7": 9.62345,
+            "8": 8.91012,
+            "9": 8.72228,
+            "10": 9.02504,
+            "11": 8.39501,
+            "12": 8.42504,
+            "13": 8.32334,
+            "14": 7.76976,
+            "15": 7.91789,
+            "16": 7.97018,
+            "17": 7.92051,
+            "18": 7.65266,
+            "19": 8.0377,
+            "20": 7.76074,
+            "21": 7.44752,
+            "22": 7.43657,
+            "23": 7.30984,
+            "24": 7.31186,
+            "25": 7.56562,
+            "26": 6.97201,
+            "27": 7.50933,
+            "28": 7.2266,
+            "29": 7.40633,
+            "30": 7.53569,
+            "31": 7.28904,
+            "32": 7.47424,
+            "33": 7.53526,
+            "34": 7.59404,
+            "35": 7.11968,
+            "36": 6.9867,
+            "37": 7.32338,
+            "38": 7.09605,
+            "39": 7.45524,
+            "40": 7.44706,
+            "41": 7.39271,
+            "42": 7.14573,
+            "43": 7.13128,
+            "44": 7.31399,
+            "45": 7.08836,
+            "46": 6.80158,
+            "47": 7.2062,
+            "48": 7.0468,
+            "49": 7.47982,
+            "50": 6.94494
         }
     },
     "iteration-time": {
         "start_step": 1,
         "end_step": 50,
-        "step_interval": 5,
+        "step_interval": 1,
         "values": {
-            "1": 51.33936,
-            "5": 1.24167,
-            "10": 1.14623,
-            "15": 1.16973,
-            "20": 1.23165,
-            "25": 1.13719,
-            "30": 1.15864,
-            "35": 1.13509,
-            "40": 1.14729,
-            "45": 1.14136,
-            "50": 1.13625
+            "1": 102.52307,
+            "2": 1.75305,
+            "3": 1.36681,
+            "4": 1.62808,
+            "5": 1.13714,
+            "6": 1.45805,
+            "7": 1.6121,
+            "8": 1.20031,
+            "9": 1.09784,
+            "10": 1.10383,
+            "11": 1.10878,
+            "12": 1.18093,
+            "13": 1.43808,
+            "14": 1.17223,
+            "15": 1.11575,
+            "16": 1.1159,
+            "17": 1.11727,
+            "18": 1.10751,
+            "19": 1.11189,
+            "20": 1.1082,
+            "21": 1.10459,
+            "22": 1.11252,
+            "23": 1.10744,
+            "24": 1.12218,
+            "25": 1.09823,
+            "26": 1.11657,
+            "27": 1.08949,
+            "28": 1.10254,
+            "29": 1.10189,
+            "30": 1.08963,
+            "31": 1.10454,
+            "32": 1.09654,
+            "33": 1.08747,
+            "34": 1.09674,
+            "35": 1.09106,
+            "36": 1.08904,
+            "37": 1.1178,
+            "38": 1.09379,
+            "39": 1.10306,
+            "40": 1.09998,
+            "41": 1.08808,
+            "42": 1.0941,
+            "43": 1.0919,
+            "44": 1.0813,
+            "45": 1.08715,
+            "46": 1.07061,
+            "47": 1.07098,
+            "48": 1.07438,
+            "49": 1.07469,
+            "50": 1.0719
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
index 6b0e31e5923..cd14bb0e352 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -341,4 +341,4 @@
             "50": 1.8958
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
index c9b7e4b8b1d..04852a3a180 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
@@ -284,4 +284,4 @@
             "50": 1.93108
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
index f529a646a7e..9533c3e29a1 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
@@ -8,102 +8,102 @@
             "2": 10.91072,
             "3": 10.91895,
             "4": 10.91763,
-            "5": 10.90484,
-            "6": 10.90203,
-            "7": 10.89753,
-            "8": 10.91294,
-            "9": 10.91701,
-            "10": 10.91028,
-            "11": 10.90124,
-            "12": 10.89698,
-            "13": 10.88788,
-            "14": 10.89478,
-            "15": 10.87488,
-            "16": 10.87022,
-            "17": 10.86892,
-            "18": 10.85196,
-            "19": 10.87008,
-            "20": 10.7881,
-            "21": 10.77222,
-            "22": 10.7669,
-            "23": 10.75865,
-            "24": 10.71955,
-            "25": 10.71987,
-            "26": 10.71249,
-            "27": 10.68554,
-            "28": 10.61292,
-            "29": 10.58664,
-            "30": 10.56554,
-            "31": 10.55749,
-            "32": 10.54875,
-            "33": 10.50948,
-            "34": 10.48165,
-            "35": 10.46995,
-            "36": 10.45309,
-            "37": 10.42791,
-            "38": 10.43268,
-            "39": 10.40324,
-            "40": 10.3773,
-            "41": 10.36856,
-            "42": 10.33125,
-            "43": 10.31537,
-            "44": 10.29014,
-            "45": 10.30253,
-            "46": 10.26536,
-            "47": 10.25557,
-            "48": 10.20689,
-            "49": 10.21031,
-            "50": 10.2105,
-            "51": 10.21191,
-            "52": 10.16277,
-            "53": 10.16315,
-            "54": 10.13391,
-            "55": 10.10867,
-            "56": 10.13455,
+            "5": 10.90462,
+            "6": 10.90222,
+            "7": 10.89756,
+            "8": 10.91282,
+            "9": 10.91678,
+            "10": 10.9104,
+            "11": 10.9015,
+            "12": 10.89781,
+            "13": 10.8883,
+            "14": 10.89516,
+            "15": 10.87477,
+            "16": 10.87004,
+            "17": 10.86866,
+            "18": 10.85186,
+            "19": 10.87023,
+            "20": 10.78833,
+            "21": 10.7724,
+            "22": 10.76686,
+            "23": 10.75821,
+            "24": 10.71892,
+            "25": 10.72027,
+            "26": 10.71214,
+            "27": 10.68529,
+            "28": 10.61314,
+            "29": 10.58641,
+            "30": 10.56586,
+            "31": 10.5575,
+            "32": 10.5488,
+            "33": 10.50937,
+            "34": 10.48155,
+            "35": 10.47006,
+            "36": 10.45297,
+            "37": 10.42758,
+            "38": 10.43258,
+            "39": 10.40282,
+            "40": 10.37727,
+            "41": 10.36865,
+            "42": 10.33123,
+            "43": 10.31512,
+            "44": 10.29023,
+            "45": 10.30268,
+            "46": 10.26547,
+            "47": 10.25564,
+            "48": 10.20686,
+            "49": 10.21056,
+            "50": 10.21037,
+            "51": 10.21194,
+            "52": 10.16248,
+            "53": 10.16319,
+            "54": 10.13395,
+            "55": 10.10854,
+            "56": 10.13474,
             "57": 10.13262,
-            "58": 10.12407,
-            "59": 10.06503,
-            "60": 10.09528,
-            "61": 10.04743,
-            "62": 10.01537,
-            "63": 10.08286,
-            "64": 10.03273,
-            "65": 9.99833,
-            "66": 10.03902,
-            "67": 10.01293,
-            "68": 9.97751,
-            "69": 9.99331,
-            "70": 9.97079,
-            "71": 9.99817,
-            "72": 9.97548,
-            "73": 9.95979,
-            "74": 9.95289,
-            "75": 9.91425,
-            "76": 9.9499,
-            "77": 9.94212,
-            "78": 9.89883,
-            "79": 9.89693,
-            "80": 9.91029,
-            "81": 9.93356,
-            "82": 9.88352,
-            "83": 9.83982,
-            "84": 9.78195,
-            "85": 9.76266,
-            "86": 9.87794,
-            "87": 9.90072,
-            "88": 9.87398,
-            "89": 9.82485,
-            "90": 9.81362,
-            "91": 9.8199,
-            "92": 9.81611,
-            "93": 9.74343,
-            "94": 9.82156,
-            "95": 9.8122,
-            "96": 9.79476,
-            "97": 9.74624,
-            "98": 9.76879,
-            "99": 9.81836,
-            "100": 9.7074
+            "58": 10.124,
+            "59": 10.06483,
+            "60": 10.09511,
+            "61": 10.04736,
+            "62": 10.01513,
+            "63": 10.08268,
+            "64": 10.03239,
+            "65": 9.99804,
+            "66": 10.03859,
+            "67": 10.01247,
+            "68": 9.97703,
+            "69": 9.9927,
+            "70": 9.97031,
+            "71": 9.99747,
+            "72": 9.97476,
+            "73": 9.95896,
+            "74": 9.95212,
+            "75": 9.9133,
+            "76": 9.94908,
+            "77": 9.94119,
+            "78": 9.89795,
+            "79": 9.89601,
+            "80": 9.90926,
+            "81": 9.93266,
+            "82": 9.8826,
+            "83": 9.83875,
+            "84": 9.78078,
+            "85": 9.76158,
+            "86": 9.87689,
+            "87": 9.89972,
+            "88": 9.87298,
+            "89": 9.82372,
+            "90": 9.81265,
+            "91": 9.81889,
+            "92": 9.81491,
+            "93": 9.74217,
+            "94": 9.82042,
+            "95": 9.81103,
+            "96": 9.79363,
+            "97": 9.74488,
+            "98": 9.76721,
+            "99": 9.81701,
+            "100": 9.70593
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 2589.0,
-            "2": 2610.0,
-            "3": 2532.0,
-            "4": 2530.0,
-            "5": 2535.0,
-            "6": 2504.0,
-            "7": 2664.0,
-            "8": 2529.0,
-            "9": 2641.0,
-            "10": 2550.0,
-            "11": 2654.0,
-            "12": 2438.0,
-            "13": 2617.0,
-            "14": 2645.0,
-            "15": 2328.0,
-            "16": 2493.0,
-            "17": 2550.0,
-            "18": 2599.0,
-            "19": 2441.0,
-            "20": 2491.0,
-            "21": 2583.0,
-            "22": 2562.0,
-            "23": 2470.0,
-            "24": 2588.0,
-            "25": 2439.0,
-            "26": 2535.0,
-            "27": 2589.0,
-            "28": 2534.0,
-            "29": 2637.0,
-            "30": 2716.0,
-            "31": 2705.0,
-            "32": 2812.0,
-            "33": 2835.0,
-            "34": 2727.0,
-            "35": 2870.0,
-            "36": 2698.0,
-            "37": 2921.0,
-            "38": 2783.0,
-            "39": 2848.0,
-            "40": 3037.0,
-            "41": 3154.0,
-            "42": 2864.0,
-            "43": 3103.0,
-            "44": 3123.0,
-            "45": 3271.0,
-            "46": 3208.0,
-            "47": 3206.0,
-            "48": 3309.0,
-            "49": 3457.0,
-            "50": 3466.0,
-            "51": 3276.0,
-            "52": 3448.0,
-            "53": 3254.0,
-            "54": 3504.0,
-            "55": 3230.0,
-            "56": 3568.0,
-            "57": 2933.0,
-            "58": 4052.0,
-            "59": 3626.0,
-            "60": 3510.0,
-            "61": 3371.0,
-            "62": 3642.0,
-            "63": 4019.0,
-            "64": 4041.0,
-            "65": 3371.0,
-            "66": 3826.0,
-            "67": 4156.0,
-            "68": 3811.0,
-            "69": 3545.0,
-            "70": 3831.0,
-            "71": 3834.0,
-            "72": 3593.0,
-            "73": 4098.0,
-            "74": 3711.0,
-            "75": 3649.0,
-            "76": 3907.0,
-            "77": 4118.0,
-            "78": 4212.0,
-            "79": 4428.0,
-            "80": 33291.0,
-            "81": 8226.0,
-            "82": 528724.0,
-            "83": 3499.0,
-            "84": 31529.0,
-            "85": 528713.0,
-            "86": 529264.0,
-            "87": 581775.0,
-            "88": 529230.0,
-            "89": 529270.0,
-            "90": 529149.0,
-            "91": 528757.0,
-            "92": 529091.0,
-            "93": 549748.0,
-            "94": 529131.0,
-            "95": 553058.0,
-            "96": 560607.0,
-            "97": 529708.0,
-            "98": 529488.0,
-            "99": 529121.0,
-            "100": 529245.0
+            "1": 6427.0,
+            "2": 6618.0,
+            "3": 6705.0,
+            "4": 6626.0,
+            "5": 6454.0,
+            "6": 6215.0,
+            "7": 6854.0,
+            "8": 6253.0,
+            "9": 6519.0,
+            "10": 6579.0,
+            "11": 6610.0,
+            "12": 6245.0,
+            "13": 6667.0,
+            "14": 6918.0,
+            "15": 6294.0,
+            "16": 6413.0,
+            "17": 6473.0,
+            "18": 6473.0,
+            "19": 6481.0,
+            "20": 6284.0,
+            "21": 6610.0,
+            "22": 6553.0,
+            "23": 6354.0,
+            "24": 6699.0,
+            "25": 6464.0,
+            "26": 6614.0,
+            "27": 6724.0,
+            "28": 6671.0,
+            "29": 7037.0,
+            "30": 6976.0,
+            "31": 7135.0,
+            "32": 7146.0,
+            "33": 7088.0,
+            "34": 7123.0,
+            "35": 7319.0,
+            "36": 7225.0,
+            "37": 7638.0,
+            "38": 7696.0,
+            "39": 7778.0,
+            "40": 7985.0,
+            "41": 8138.0,
+            "42": 7526.0,
+            "43": 8067.0,
+            "44": 7962.0,
+            "45": 8660.0,
+            "46": 8468.0,
+            "47": 8513.0,
+            "48": 8547.0,
+            "49": 8878.0,
+            "50": 8823.0,
+            "51": 8750.0,
+            "52": 8942.0,
+            "53": 8470.0,
+            "54": 9274.0,
+            "55": 8387.0,
+            "56": 9552.0,
+            "57": 7729.0,
+            "58": 10444.0,
+            "59": 9320.0,
+            "60": 9455.0,
+            "61": 8934.0,
+            "62": 9447.0,
+            "63": 10085.0,
+            "64": 10049.0,
+            "65": 8632.0,
+            "66": 9644.0,
+            "67": 10241.0,
+            "68": 9905.0,
+            "69": 8978.0,
+            "70": 9730.0,
+            "71": 9629.0,
+            "72": 9249.0,
+            "73": 10081.0,
+            "74": 14397.0,
+            "75": 8917.0,
+            "76": 10143.0,
+            "77": 10427.0,
+            "78": 10760.0,
+            "79": 68696.0,
+            "80": 132664.0,
+            "81": 80159.0,
+            "82": 1117640.0,
+            "83": 67014.0,
+            "84": 1112297.0,
+            "85": 2106479.0,
+            "86": 2108092.0,
+            "87": 1279087.0,
+            "88": 2107686.0,
+            "89": 2111718.0,
+            "90": 1059710.0,
+            "91": 2106808.0,
+            "92": 2106945.0,
+            "93": 3155405.0,
+            "94": 2107876.0,
+            "95": 2155420.0,
+            "96": 2170260.0,
+            "97": 2108441.0,
+            "98": 2107668.0,
+            "99": 2107336.0,
+            "100": 2107900.0
         }
     },
     "mem-allocated-bytes": {
@@ -327,104 +327,104 @@
         "values": {
             "1": 974333952.0,
             "2": 1142500864.0,
-            "3": 1142675968.0,
-            "4": 1147437056.0,
-            "5": 1147925504.0,
-            "6": 1147925504.0,
-            "7": 1148942336.0,
-            "8": 1148942336.0,
-            "9": 1148942336.0,
-            "10": 1148942336.0,
-            "11": 1148942336.0,
-            "12": 1148942336.0,
-            "13": 1148942336.0,
-            "14": 1148942336.0,
-            "15": 1148942336.0,
-            "16": 1148942336.0,
-            "17": 1148942336.0,
-            "18": 1148942336.0,
-            "19": 1148942336.0,
-            "20": 1148942336.0,
-            "21": 1148942336.0,
-            "22": 1148942336.0,
-            "23": 1148942336.0,
-            "24": 1148942336.0,
-            "25": 1148942336.0,
-            "26": 1149713920.0,
-            "27": 1149713920.0,
-            "28": 1149713920.0,
-            "29": 1149713920.0,
-            "30": 1149713920.0,
-            "31": 1149713920.0,
-            "32": 1149713920.0,
-            "33": 1149713920.0,
-            "34": 1149713920.0,
-            "35": 1149713920.0,
-            "36": 1149713920.0,
-            "37": 1149713920.0,
-            "38": 1149713920.0,
-            "39": 1149713920.0,
-            "40": 1149713920.0,
-            "41": 1149713920.0,
-            "42": 1149713920.0,
-            "43": 1149713920.0,
-            "44": 1149713920.0,
-            "45": 1149713920.0,
-            "46": 1149713920.0,
-            "47": 1149713920.0,
-            "48": 1149713920.0,
-            "49": 1149713920.0,
-            "50": 1149713920.0,
-            "51": 1149713920.0,
-            "52": 1149713920.0,
-            "53": 1149713920.0,
-            "54": 1149713920.0,
-            "55": 1149713920.0,
-            "56": 1149713920.0,
-            "57": 1149713920.0,
-            "58": 1149713920.0,
-            "59": 1149713920.0,
-            "60": 1149713920.0,
-            "61": 1149713920.0,
-            "62": 1149713920.0,
-            "63": 1149713920.0,
-            "64": 1149713920.0,
-            "65": 1149713920.0,
-            "66": 1149713920.0,
-            "67": 1149713920.0,
-            "68": 1149713920.0,
-            "69": 1149713920.0,
-            "70": 1149713920.0,
-            "71": 1149713920.0,
-            "72": 1149713920.0,
-            "73": 1149713920.0,
-            "74": 1149713920.0,
-            "75": 1149713920.0,
-            "76": 1149713920.0,
-            "77": 1149713920.0,
-            "78": 1149713920.0,
-            "79": 1149713920.0,
-            "80": 1149713920.0,
-            "81": 1149713920.0,
-            "82": 1149713920.0,
-            "83": 1149713920.0,
-            "84": 1149713920.0,
-            "85": 1149713920.0,
-            "86": 1149713920.0,
-            "87": 1149713920.0,
-            "88": 1149713920.0,
-            "89": 1149713920.0,
-            "90": 1149713920.0,
-            "91": 1149713920.0,
-            "92": 1149713920.0,
-            "93": 1149713920.0,
-            "94": 1149713920.0,
-            "95": 1149713920.0,
-            "96": 1149713920.0,
-            "97": 1149713920.0,
-            "98": 1149713920.0,
-            "99": 1149713920.0,
-            "100": 1149713920.0
+            "3": 1142671872.0,
+            "4": 1147373568.0,
+            "5": 1147845632.0,
+            "6": 1147845632.0,
+            "7": 1148584448.0,
+            "8": 1148584448.0,
+            "9": 1148584448.0,
+            "10": 1148584448.0,
+            "11": 1148584448.0,
+            "12": 1148584448.0,
+            "13": 1148584448.0,
+            "14": 1148584448.0,
+            "15": 1148584448.0,
+            "16": 1148584448.0,
+            "17": 1148584448.0,
+            "18": 1148584448.0,
+            "19": 1148584448.0,
+            "20": 1148584448.0,
+            "21": 1148584448.0,
+            "22": 1148584448.0,
+            "23": 1148584448.0,
+            "24": 1148584448.0,
+            "25": 1148584448.0,
+            "26": 1148584448.0,
+            "27": 1148584448.0,
+            "28": 1148584448.0,
+            "29": 1148584448.0,
+            "30": 1148584448.0,
+            "31": 1148584448.0,
+            "32": 1148584448.0,
+            "33": 1148584448.0,
+            "34": 1148584448.0,
+            "35": 1148595200.0,
+            "36": 1148595200.0,
+            "37": 1148595200.0,
+            "38": 1148595200.0,
+            "39": 1148595200.0,
+            "40": 1148595200.0,
+            "41": 1148595200.0,
+            "42": 1148595200.0,
+            "43": 1148595200.0,
+            "44": 1148595200.0,
+            "45": 1148595200.0,
+            "46": 1148595200.0,
+            "47": 1148595200.0,
+            "48": 1148595200.0,
+            "49": 1148595200.0,
+            "50": 1148595200.0,
+            "51": 1148595200.0,
+            "52": 1148595200.0,
+            "53": 1148595200.0,
+            "54": 1148595200.0,
+            "55": 1148595200.0,
+            "56": 1148595200.0,
+            "57": 1148595200.0,
+            "58": 1148595200.0,
+            "59": 1148595200.0,
+            "60": 1148595200.0,
+            "61": 1148595200.0,
+            "62": 1148595200.0,
+            "63": 1148595200.0,
+            "64": 1148595200.0,
+            "65": 1148595200.0,
+            "66": 1148595200.0,
+            "67": 1148595200.0,
+            "68": 1148595200.0,
+            "69": 1148595200.0,
+            "70": 1148595200.0,
+            "71": 1148595200.0,
+            "72": 1148595200.0,
+            "73": 1148595200.0,
+            "74": 1148595200.0,
+            "75": 1148595200.0,
+            "76": 1148595200.0,
+            "77": 1148595200.0,
+            "78": 1148595200.0,
+            "79": 1148595200.0,
+            "80": 1148595200.0,
+            "81": 1148595200.0,
+            "82": 1148595200.0,
+            "83": 1148595200.0,
+            "84": 1148595200.0,
+            "85": 1148595200.0,
+            "86": 1148595200.0,
+            "87": 1148595200.0,
+            "88": 1148595200.0,
+            "89": 1148595200.0,
+            "90": 1148595200.0,
+            "91": 1148595200.0,
+            "92": 1148595200.0,
+            "93": 1148595200.0,
+            "94": 1148595200.0,
+            "95": 1148595200.0,
+            "96": 1148595200.0,
+            "97": 1148595200.0,
+            "98": 1148595200.0,
+            "99": 1148595200.0,
+            "100": 1148595200.0
         }
     },
     "iteration-time": {
@@ -433,105 +433,105 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 11.7836,
-            "3": 0.58975,
-            "4": 0.56544,
-            "5": 0.5504,
-            "6": 0.56842,
-            "7": 0.5491,
-            "8": 0.54138,
-            "9": 0.53371,
-            "10": 0.5342,
-            "11": 0.53224,
-            "12": 0.52891,
-            "13": 0.52976,
-            "14": 0.53162,
-            "15": 0.52297,
-            "16": 0.52336,
-            "17": 0.52793,
-            "18": 0.52225,
-            "19": 0.52121,
-            "20": 0.52937,
-            "21": 0.53168,
-            "22": 0.52349,
-            "23": 0.52045,
-            "24": 0.53318,
-            "25": 0.52745,
-            "26": 0.51972,
-            "27": 0.52474,
-            "28": 0.53885,
-            "29": 0.54406,
-            "30": 0.52979,
-            "31": 0.52273,
-            "32": 0.52354,
-            "33": 0.52179,
-            "34": 0.52809,
-            "35": 0.52207,
-            "36": 0.52789,
-            "37": 0.51996,
-            "38": 0.53223,
-            "39": 0.52549,
-            "40": 0.53308,
-            "41": 0.53147,
-            "42": 0.53153,
-            "43": 0.5292,
-            "44": 0.52056,
-            "45": 0.52578,
-            "46": 0.51549,
-            "47": 0.51842,
-            "48": 0.51917,
-            "49": 0.52488,
-            "50": 0.52255,
-            "51": 0.64477,
-            "52": 0.51979,
-            "53": 0.52383,
-            "54": 0.52192,
-            "55": 0.51931,
-            "56": 0.51907,
-            "57": 0.52009,
-            "58": 0.51807,
-            "59": 0.51736,
-            "60": 0.51892,
-            "61": 0.51809,
-            "62": 0.52089,
-            "63": 0.52315,
-            "64": 0.51504,
-            "65": 0.51491,
-            "66": 0.51739,
-            "67": 0.51455,
-            "68": 0.51564,
-            "69": 1.04071,
-            "70": 0.5162,
-            "71": 0.51607,
-            "72": 0.5156,
-            "73": 0.51835,
-            "74": 0.51882,
-            "75": 0.52265,
-            "76": 0.51863,
-            "77": 0.51483,
-            "78": 0.51774,
-            "79": 0.52634,
-            "80": 0.52171,
-            "81": 0.52135,
-            "82": 0.52168,
-            "83": 0.53375,
-            "84": 0.51785,
-            "85": 0.52358,
-            "86": 0.51614,
-            "87": 0.52652,
-            "88": 0.51691,
-            "89": 0.51638,
-            "90": 0.52191,
-            "91": 0.51655,
-            "92": 0.51846,
-            "93": 0.51379,
-            "94": 0.51835,
-            "95": 0.91609,
-            "96": 0.51869,
-            "97": 0.51813,
-            "98": 0.5255,
-            "99": 0.52418,
-            "100": 0.53762
+            "2": 8.7306,
+            "3": 0.82541,
+            "4": 0.79111,
+            "5": 0.78772,
+            "6": 0.78491,
+            "7": 0.77321,
+            "8": 0.80845,
+            "9": 0.76281,
+            "10": 0.76741,
+            "11": 0.76405,
+            "12": 0.7464,
+            "13": 0.74032,
+            "14": 0.74249,
+            "15": 0.7361,
+            "16": 0.73487,
+            "17": 0.72656,
+            "18": 0.73602,
+            "19": 0.72939,
+            "20": 0.72896,
+            "21": 0.7316,
+            "22": 0.73357,
+            "23": 0.72972,
+            "24": 0.73707,
+            "25": 0.73966,
+            "26": 0.719,
+            "27": 0.72924,
+            "28": 0.74616,
+            "29": 0.75162,
+            "30": 0.75031,
+            "31": 0.74663,
+            "32": 0.73337,
+            "33": 0.73723,
+            "34": 0.73465,
+            "35": 0.73771,
+            "36": 0.7385,
+            "37": 0.73536,
+            "38": 0.74515,
+            "39": 0.73575,
+            "40": 0.74509,
+            "41": 0.73501,
+            "42": 0.74091,
+            "43": 0.74268,
+            "44": 0.73316,
+            "45": 0.7359,
+            "46": 0.72733,
+            "47": 0.73408,
+            "48": 0.73042,
+            "49": 0.73455,
+            "50": 0.72958,
+            "51": 0.8591,
+            "52": 0.81718,
+            "53": 0.74131,
+            "54": 0.74839,
+            "55": 0.74974,
+            "56": 0.75244,
+            "57": 0.74244,
+            "58": 0.73823,
+            "59": 0.74268,
+            "60": 0.74576,
+            "61": 0.74499,
+            "62": 0.74408,
+            "63": 0.74442,
+            "64": 0.74569,
+            "65": 0.73634,
+            "66": 0.74134,
+            "67": 1.30864,
+            "68": 0.74506,
+            "69": 0.7469,
+            "70": 0.73887,
+            "71": 0.74595,
+            "72": 0.73832,
+            "73": 0.73662,
+            "74": 0.74627,
+            "75": 0.75627,
+            "76": 0.74451,
+            "77": 0.73734,
+            "78": 0.73831,
+            "79": 0.74279,
+            "80": 0.74483,
+            "81": 0.74523,
+            "82": 0.7475,
+            "83": 0.75273,
+            "84": 0.74267,
+            "85": 0.73974,
+            "86": 0.73832,
+            "87": 0.74642,
+            "88": 0.73886,
+            "89": 0.73962,
+            "90": 0.82905,
+            "91": 0.73775,
+            "92": 0.7538,
+            "93": 0.75623,
+            "94": 0.74641,
+            "95": 0.74354,
+            "96": 0.73224,
+            "97": 0.73277,
+            "98": 0.73692,
+            "99": 0.73794,
+            "100": 0.73356
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
index 13107c98731..ba5070c3f7d 100644
--- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
@@ -6,104 +6,104 @@
         "values": {
             "1": 10.95659,
             "2": 10.95273,
-            "3": 10.97293,
-            "4": 10.95528,
-            "5": 10.95882,
-            "6": 10.96034,
-            "7": 10.94954,
-            "8": 10.95612,
-            "9": 10.96238,
-            "10": 10.95507,
-            "11": 10.94969,
-            "12": 10.94911,
-            "13": 10.94317,
-            "14": 10.93962,
-            "15": 10.913,
-            "16": 10.89317,
-            "17": 10.89415,
-            "18": 10.8839,
-            "19": 10.88757,
-            "20": 10.81652,
-            "21": 10.77937,
-            "22": 10.77934,
-            "23": 10.75133,
-            "24": 10.73697,
-            "25": 10.70906,
-            "26": 10.70254,
-            "27": 10.66656,
-            "28": 10.58983,
-            "29": 10.57516,
-            "30": 10.5394,
-            "31": 10.54957,
-            "32": 10.49421,
-            "33": 10.45586,
-            "34": 10.45429,
-            "35": 10.41583,
-            "36": 10.40492,
-            "37": 10.37411,
-            "38": 10.38053,
-            "39": 10.33652,
-            "40": 10.33756,
-            "41": 10.29221,
-            "42": 10.24553,
-            "43": 10.23799,
-            "44": 10.20506,
-            "45": 10.23982,
-            "46": 10.1679,
-            "47": 10.16495,
-            "48": 10.11261,
-            "49": 10.11907,
-            "50": 10.09877,
-            "51": 10.11395,
-            "52": 10.07035,
-            "53": 10.03508,
-            "54": 10.01882,
-            "55": 9.97026,
-            "56": 10.01777,
-            "57": 10.00232,
-            "58": 10.00899,
-            "59": 9.93742,
-            "60": 9.97734,
-            "61": 9.92095,
-            "62": 9.86099,
-            "63": 9.97248,
-            "64": 9.91755,
-            "65": 9.8803,
-            "66": 9.90574,
-            "67": 9.88904,
-            "68": 9.81432,
-            "69": 9.83828,
-            "70": 9.82923,
-            "71": 9.85446,
-            "72": 9.84568,
-            "73": 9.79826,
-            "74": 9.79391,
-            "75": 9.74221,
-            "76": 9.8121,
-            "77": 9.8087,
-            "78": 9.76164,
-            "79": 9.73731,
-            "80": 9.76093,
-            "81": 9.80125,
-            "82": 9.72437,
-            "83": 9.66548,
-            "84": 9.62666,
-            "85": 9.59103,
+            "3": 10.97332,
+            "4": 10.95546,
+            "5": 10.95888,
+            "6": 10.96097,
+            "7": 10.94955,
+            "8": 10.95621,
+            "9": 10.96206,
+            "10": 10.95524,
+            "11": 10.94894,
+            "12": 10.94925,
+            "13": 10.9431,
+            "14": 10.93961,
+            "15": 10.91324,
+            "16": 10.89338,
+            "17": 10.89379,
+            "18": 10.88434,
+            "19": 10.88764,
+            "20": 10.81674,
+            "21": 10.77913,
+            "22": 10.77926,
+            "23": 10.75105,
+            "24": 10.73706,
+            "25": 10.70916,
+            "26": 10.70265,
+            "27": 10.66696,
+            "28": 10.59021,
+            "29": 10.5754,
+            "30": 10.53952,
+            "31": 10.54926,
+            "32": 10.49446,
+            "33": 10.45604,
+            "34": 10.45455,
+            "35": 10.41632,
+            "36": 10.40498,
+            "37": 10.37442,
+            "38": 10.38102,
+            "39": 10.33659,
+            "40": 10.3377,
+            "41": 10.29213,
+            "42": 10.24631,
+            "43": 10.23808,
+            "44": 10.20535,
+            "45": 10.24008,
+            "46": 10.16857,
+            "47": 10.1652,
+            "48": 10.11327,
+            "49": 10.11958,
+            "50": 10.09962,
+            "51": 10.11422,
+            "52": 10.07093,
+            "53": 10.03567,
+            "54": 10.01915,
+            "55": 9.97095,
+            "56": 10.01829,
+            "57": 10.00271,
+            "58": 10.0092,
+            "59": 9.93771,
+            "60": 9.97758,
+            "61": 9.92087,
+            "62": 9.86126,
+            "63": 9.97292,
+            "64": 9.91782,
+            "65": 9.88058,
+            "66": 9.90562,
+            "67": 9.8895,
+            "68": 9.81454,
+            "69": 9.83864,
+            "70": 9.82955,
+            "71": 9.855,
+            "72": 9.84555,
+            "73": 9.79876,
+            "74": 9.79422,
+            "75": 9.74264,
+            "76": 9.81239,
+            "77": 9.80893,
+            "78": 9.76169,
+            "79": 9.7373,
+            "80": 9.76111,
+            "81": 9.80122,
+            "82": 9.72383,
+            "83": 9.66576,
+            "84": 9.62658,
+            "85": 9.59095,
             "86": 9.7377,
-            "87": 9.72698,
-            "88": 9.73452,
-            "89": 9.63568,
-            "90": 9.62951,
-            "91": 9.6743,
-            "92": 9.63781,
-            "93": 9.53741,
-            "94": 9.65643,
-            "95": 9.62904,
-            "96": 9.63411,
-            "97": 9.54632,
-            "98": 9.59572,
-            "99": 9.64181,
-            "100": 9.53588
+            "87": 9.72693,
+            "88": 9.7346,
+            "89": 9.63566,
+            "90": 9.62949,
+            "91": 9.67372,
+            "92": 9.63811,
+            "93": 9.53749,
+            "94": 9.65631,
+            "95": 9.62893,
+            "96": 9.63437,
+            "97": 9.5463,
+            "98": 9.59551,
+            "99": 9.64184,
+            "100": 9.53486
         }
     },
     "num-zeros": {
@@ -111,106 +111,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 22985512.0,
-            "2": 22866856.0,
-            "3": 22718796.0,
-            "4": 22793112.0,
-            "5": 22800332.0,
-            "6": 22758732.0,
-            "7": 22889360.0,
-            "8": 22616952.0,
-            "9": 22770544.0,
-            "10": 22482356.0,
-            "11": 22768012.0,
-            "12": 22646636.0,
-            "13": 23376168.0,
-            "14": 23020836.0,
-            "15": 22728468.0,
-            "16": 22844216.0,
-            "17": 22956308.0,
-            "18": 23025414.0,
-            "19": 23121784.0,
-            "20": 22737782.0,
-            "21": 22939100.0,
-            "22": 22975384.0,
-            "23": 22636422.0,
-            "24": 22885688.0,
-            "25": 22646604.0,
-            "26": 23036340.0,
-            "27": 22820192.0,
-            "28": 23031660.0,
-            "29": 23007710.0,
-            "30": 22978120.0,
-            "31": 22931544.0,
-            "32": 22671892.0,
-            "33": 22753852.0,
-            "34": 23115320.0,
-            "35": 22764186.0,
-            "36": 22708208.0,
-            "37": 23140396.0,
-            "38": 22990976.0,
-            "39": 23017508.0,
-            "40": 22766752.0,
-            "41": 23101556.0,
-            "42": 22700170.0,
-            "43": 23019194.0,
-            "44": 22716630.0,
-            "45": 22868256.0,
-            "46": 22743362.0,
-            "47": 22871888.0,
-            "48": 22852526.0,
-            "49": 22908212.0,
-            "50": 22654480.0,
-            "51": 22713796.0,
-            "52": 22833128.0,
-            "53": 22987656.0,
-            "54": 22807024.0,
-            "55": 22950740.0,
-            "56": 22669944.0,
-            "57": 23234316.0,
-            "58": 22699600.0,
-            "59": 22862426.0,
-            "60": 23046704.0,
-            "61": 22688294.0,
-            "62": 22743112.0,
-            "63": 22643864.0,
-            "64": 23031820.0,
-            "65": 23243684.0,
-            "66": 22705280.0,
-            "67": 22986366.0,
-            "68": 22949460.0,
-            "69": 23193560.0,
-            "70": 22838360.0,
-            "71": 22750350.0,
-            "72": 23155256.0,
-            "73": 23168624.0,
-            "74": 22970414.0,
-            "75": 22903392.0,
-            "76": 22714040.0,
-            "77": 23011804.0,
-            "78": 23010392.0,
-            "79": 22845544.0,
-            "80": 22958276.0,
-            "81": 22850234.0,
-            "82": 22746280.0,
-            "83": 22741604.0,
-            "84": 23135624.0,
-            "85": 22945892.0,
-            "86": 23108160.0,
-            "87": 22369104.0,
-            "88": 22565104.0,
-            "89": 22738008.0,
-            "90": 22782056.0,
-            "91": 22941128.0,
-            "92": 22680628.0,
-            "93": 22647880.0,
-            "94": 23168946.0,
-            "95": 22702252.0,
-            "96": 22867296.0,
-            "97": 22852594.0,
-            "98": 22897226.0,
-            "99": 22645712.0,
-            "100": 23029136.0
+            "1": 22986336.0,
+            "2": 22867800.0,
+            "3": 22719788.0,
+            "4": 22794016.0,
+            "5": 22801176.0,
+            "6": 22759778.0,
+            "7": 22890350.0,
+            "8": 22617894.0,
+            "9": 22771488.0,
+            "10": 22483272.0,
+            "11": 22768864.0,
+            "12": 22647492.0,
+            "13": 23377228.0,
+            "14": 23021960.0,
+            "15": 22729568.0,
+            "16": 22845228.0,
+            "17": 22957312.0,
+            "18": 23026388.0,
+            "19": 23122824.0,
+            "20": 22738710.0,
+            "21": 22939894.0,
+            "22": 22976252.0,
+            "23": 22637356.0,
+            "24": 22886552.0,
+            "25": 22647580.0,
+            "26": 23037294.0,
+            "27": 22821206.0,
+            "28": 23032666.0,
+            "29": 23008740.0,
+            "30": 22979150.0,
+            "31": 22932438.0,
+            "32": 22672848.0,
+            "33": 22754864.0,
+            "34": 23116486.0,
+            "35": 22766216.0,
+            "36": 22709636.0,
+            "37": 23141720.0,
+            "38": 22992540.0,
+            "39": 23019594.0,
+            "40": 22767810.0,
+            "41": 23106740.0,
+            "42": 23749792.0,
+            "43": 24068872.0,
+            "44": 22717204.0,
+            "45": 22869462.0,
+            "46": 23792952.0,
+            "47": 22873204.0,
+            "48": 22853536.0,
+            "49": 23957960.0,
+            "50": 23704312.0,
+            "51": 23763734.0,
+            "52": 23883638.0,
+            "53": 24037404.0,
+            "54": 23856864.0,
+            "55": 24001098.0,
+            "56": 23720376.0,
+            "57": 24284706.0,
+            "58": 23749464.0,
+            "59": 23913848.0,
+            "60": 24098614.0,
+            "61": 23739552.0,
+            "62": 22746136.0,
+            "63": 24743096.0,
+            "64": 24081656.0,
+            "65": 24297808.0,
+            "66": 23760908.0,
+            "67": 24037080.0,
+            "68": 25048560.0,
+            "69": 24243456.0,
+            "70": 23892164.0,
+            "71": 24848832.0,
+            "72": 24205880.0,
+            "73": 24221284.0,
+            "74": 25068664.0,
+            "75": 23957416.0,
+            "76": 23764968.0,
+            "77": 25110192.0,
+            "78": 24061600.0,
+            "79": 23895220.0,
+            "80": 24008140.0,
+            "81": 23905368.0,
+            "82": 23796512.0,
+            "83": 22742706.0,
+            "84": 24186848.0,
+            "85": 23995992.0,
+            "86": 24180432.0,
+            "87": 23419392.0,
+            "88": 23615546.0,
+            "89": 23787832.0,
+            "90": 23832072.0,
+            "91": 23991824.0,
+            "92": 23731324.0,
+            "93": 22649044.0,
+            "94": 24219140.0,
+            "95": 22705788.0,
+            "96": 23918516.0,
+            "97": 23902516.0,
+            "98": 22898480.0,
+            "99": 23695706.0,
+            "100": 24079564.0
         }
     },
     "mem-allocated-bytes": {
@@ -218,106 +218,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 804504064.0,
-            "2": 766036992.0,
-            "3": 844984320.0,
-            "4": 801470464.0,
-            "5": 808671232.0,
-            "6": 808818688.0,
-            "7": 804558848.0,
-            "8": 801470464.0,
-            "9": 809105408.0,
-            "10": 807352320.0,
+            "1": 815727104.0,
+            "2": 787672064.0,
+            "3": 804506624.0,
+            "4": 807958528.0,
+            "5": 808808448.0,
+            "6": 808817664.0,
+            "7": 808515584.0,
+            "8": 807352320.0,
+            "9": 807352320.0,
+            "10": 804616192.0,
             "11": 801470464.0,
-            "12": 809530368.0,
-            "13": 808635392.0,
-            "14": 801470464.0,
-            "15": 808386560.0,
-            "16": 804108288.0,
-            "17": 809097216.0,
+            "12": 801470464.0,
+            "13": 807958528.0,
+            "14": 809416704.0,
+            "15": 808757248.0,
+            "16": 808307712.0,
+            "17": 804108288.0,
             "18": 801470464.0,
             "19": 801470464.0,
             "20": 801470464.0,
-            "21": 808868864.0,
-            "22": 808409088.0,
-            "23": 808865792.0,
-            "24": 801470464.0,
+            "21": 809096192.0,
+            "22": 808957952.0,
+            "23": 804558848.0,
+            "24": 808546304.0,
             "25": 801470464.0,
-            "26": 807958528.0,
-            "27": 808450048.0,
-            "28": 808056832.0,
-            "29": 801470464.0,
-            "30": 808410112.0,
-            "31": 808336384.0,
-            "32": 804922368.0,
-            "33": 801470464.0,
-            "34": 808819712.0,
-            "35": 809051136.0,
-            "36": 801470464.0,
-            "37": 808127488.0,
-            "38": 808793088.0,
-            "39": 807352320.0,
+            "26": 801470464.0,
+            "27": 808245248.0,
+            "28": 801470464.0,
+            "29": 804977664.0,
+            "30": 801470464.0,
+            "31": 801470464.0,
+            "32": 801470464.0,
+            "33": 809012224.0,
+            "34": 807958528.0,
+            "35": 807958528.0,
+            "36": 808537088.0,
+            "37": 804976640.0,
+            "38": 804354048.0,
+            "39": 804157440.0,
             "40": 801470464.0,
             "41": 801470464.0,
-            "42": 808155136.0,
-            "43": 808081408.0,
-            "44": 801470464.0,
-            "45": 808409088.0,
-            "46": 808429568.0,
-            "47": 807985152.0,
-            "48": 804370432.0,
-            "49": 801470464.0,
-            "50": 808466432.0,
+            "42": 801470464.0,
+            "43": 801470464.0,
+            "44": 808138752.0,
+            "45": 808515584.0,
+            "46": 808938496.0,
+            "47": 807837696.0,
+            "48": 807352320.0,
+            "49": 807676928.0,
+            "50": 808089600.0,
             "51": 801470464.0,
             "52": 801470464.0,
-            "53": 808466432.0,
-            "54": 808409088.0,
-            "55": 807352320.0,
-            "56": 804558848.0,
+            "53": 801470464.0,
+            "54": 801470464.0,
+            "55": 801470464.0,
+            "56": 808879104.0,
             "57": 801470464.0,
-            "58": 808627200.0,
-            "59": 808847360.0,
-            "60": 808333312.0,
-            "61": 804354048.0,
-            "62": 801470464.0,
-            "63": 808409088.0,
-            "64": 808681472.0,
-            "65": 808024064.0,
-            "66": 808409088.0,
-            "67": 805165056.0,
+            "58": 807958528.0,
+            "59": 807352320.0,
+            "60": 804780032.0,
+            "61": 804108288.0,
+            "62": 804108288.0,
+            "63": 801470464.0,
+            "64": 804558848.0,
+            "65": 804108288.0,
+            "66": 801470464.0,
+            "67": 801470464.0,
             "68": 801470464.0,
-            "69": 808627200.0,
-            "70": 808187904.0,
-            "71": 808409088.0,
-            "72": 807802880.0,
+            "69": 801470464.0,
+            "70": 801470464.0,
+            "71": 801470464.0,
+            "72": 801470464.0,
             "73": 801470464.0,
             "74": 801470464.0,
-            "75": 808377344.0,
-            "76": 809356288.0,
-            "77": 804843520.0,
+            "75": 801470464.0,
+            "76": 801470464.0,
+            "77": 801470464.0,
             "78": 801470464.0,
-            "79": 808267776.0,
-            "80": 805473280.0,
+            "79": 801470464.0,
+            "80": 801470464.0,
             "81": 801470464.0,
-            "82": 808417280.0,
-            "83": 808309760.0,
-            "84": 801470464.0,
-            "85": 808409088.0,
-            "86": 808409088.0,
+            "82": 801470464.0,
+            "83": 808409088.0,
+            "84": 808409088.0,
+            "85": 801470464.0,
+            "86": 801470464.0,
             "87": 801470464.0,
-            "88": 807958528.0,
-            "89": 808089600.0,
-            "90": 801470464.0,
-            "91": 808293376.0,
-            "92": 807958528.0,
-            "93": 807663616.0,
-            "94": 801470464.0,
-            "95": 808237056.0,
-            "96": 809064448.0,
-            "97": 801470464.0,
-            "98": 801470464.0,
-            "99": 808598528.0,
-            "100": 808688640.0
+            "88": 809389056.0,
+            "89": 808253440.0,
+            "90": 807802880.0,
+            "91": 807418880.0,
+            "92": 807352320.0,
+            "93": 807802880.0,
+            "94": 807802880.0,
+            "95": 805222400.0,
+            "96": 804149248.0,
+            "97": 804108288.0,
+            "98": 804108288.0,
+            "99": 804558848.0,
+            "100": 801470464.0
         }
     },
     "mem-max-allocated-bytes": {
@@ -325,106 +325,106 @@
         "end_step": 100,
         "step_interval": 1,
         "values": {
-            "1": 978379776.0,
-            "2": 1181844992.0,
-            "3": 1184461312.0,
-            "4": 1184461312.0,
-            "5": 1184461312.0,
-            "6": 1184461312.0,
-            "7": 1184461312.0,
-            "8": 1184461312.0,
-            "9": 1184461312.0,
-            "10": 1184461312.0,
-            "11": 1184461312.0,
-            "12": 1184461312.0,
-            "13": 1184461312.0,
-            "14": 1184461312.0,
-            "15": 1184461312.0,
-            "16": 1184461312.0,
-            "17": 1184461312.0,
-            "18": 1184461312.0,
-            "19": 1184461312.0,
-            "20": 1184461312.0,
-            "21": 1184461312.0,
-            "22": 1184461312.0,
-            "23": 1184461312.0,
-            "24": 1184461312.0,
-            "25": 1184461312.0,
-            "26": 1184461312.0,
-            "27": 1184461312.0,
-            "28": 1184461312.0,
-            "29": 1184461312.0,
-            "30": 1184461312.0,
-            "31": 1184461312.0,
-            "32": 1184461312.0,
-            "33": 1184461312.0,
-            "34": 1184461312.0,
-            "35": 1184461312.0,
-            "36": 1184461312.0,
-            "37": 1184461312.0,
-            "38": 1184461312.0,
-            "39": 1184461312.0,
-            "40": 1184461312.0,
-            "41": 1184461312.0,
-            "42": 1184461312.0,
-            "43": 1184461312.0,
-            "44": 1184461312.0,
-            "45": 1184461312.0,
-            "46": 1184461312.0,
-            "47": 1184461312.0,
-            "48": 1184461312.0,
-            "49": 1184461312.0,
-            "50": 1184461312.0,
-            "51": 1184461312.0,
-            "52": 1184461312.0,
-            "53": 1184461312.0,
-            "54": 1184461312.0,
-            "55": 1184461312.0,
-            "56": 1184461312.0,
-            "57": 1184461312.0,
-            "58": 1184461312.0,
-            "59": 1184461312.0,
-            "60": 1184461312.0,
-            "61": 1184461312.0,
-            "62": 1184461312.0,
-            "63": 1184461312.0,
-            "64": 1184461312.0,
-            "65": 1184461312.0,
-            "66": 1184461312.0,
-            "67": 1184461312.0,
-            "68": 1184461312.0,
-            "69": 1184461312.0,
-            "70": 1184461312.0,
-            "71": 1184461312.0,
-            "72": 1184461312.0,
-            "73": 1184461312.0,
-            "74": 1184461312.0,
-            "75": 1184461312.0,
-            "76": 1184461312.0,
-            "77": 1184461312.0,
-            "78": 1184461312.0,
-            "79": 1184461312.0,
-            "80": 1184461312.0,
-            "81": 1184461312.0,
-            "82": 1184461312.0,
-            "83": 1184461312.0,
-            "84": 1184461312.0,
-            "85": 1184461312.0,
-            "86": 1184461312.0,
-            "87": 1184461312.0,
-            "88": 1184461312.0,
-            "89": 1184461312.0,
-            "90": 1184461312.0,
-            "91": 1184461312.0,
-            "92": 1184461312.0,
-            "93": 1184461312.0,
-            "94": 1184461312.0,
-            "95": 1184461312.0,
-            "96": 1184461312.0,
-            "97": 1184461312.0,
-            "98": 1184461312.0,
-            "99": 1184461312.0,
-            "100": 1184461312.0
+            "1": 957495296.0,
+            "2": 1172456960.0,
+            "3": 1178258432.0,
+            "4": 1178258432.0,
+            "5": 1178258432.0,
+            "6": 1178258432.0,
+            "7": 1178258432.0,
+            "8": 1178258432.0,
+            "9": 1178258432.0,
+            "10": 1178258432.0,
+            "11": 1178258432.0,
+            "12": 1178258432.0,
+            "13": 1178258432.0,
+            "14": 1178258432.0,
+            "15": 1178258432.0,
+            "16": 1178258432.0,
+            "17": 1178258432.0,
+            "18": 1178258432.0,
+            "19": 1178258432.0,
+            "20": 1178258432.0,
+            "21": 1178258432.0,
+            "22": 1178258432.0,
+            "23": 1178258432.0,
+            "24": 1178258432.0,
+            "25": 1178258432.0,
+            "26": 1178258432.0,
+            "27": 1178258432.0,
+            "28": 1178258432.0,
+            "29": 1178258432.0,
+            "30": 1178258432.0,
+            "31": 1178258432.0,
+            "32": 1178258432.0,
+            "33": 1178258432.0,
+            "34": 1178258432.0,
+            "35": 1178258432.0,
+            "36": 1178258432.0,
+            "37": 1178258432.0,
+            "38": 1178258432.0,
+            "39": 1178258432.0,
+            "40": 1178258432.0,
+            "41": 1178258432.0,
+            "42": 1178258432.0,
+            "43": 1178258432.0,
+            "44": 1178258432.0,
+            "45": 1178258432.0,
+            "46": 1178258432.0,
+            "47": 1178258432.0,
+            "48": 1178258432.0,
+            "49": 1178258432.0,
+            "50": 1178258432.0,
+            "51": 1178258432.0,
+            "52": 1178258432.0,
+            "53": 1178258432.0,
+            "54": 1178258432.0,
+            "55": 1178258432.0,
+            "56": 1178258432.0,
+            "57": 1178258432.0,
+            "58": 1178258432.0,
+            "59": 1178258432.0,
+            "60": 1178258432.0,
+            "61": 1178258432.0,
+            "62": 1178258432.0,
+            "63": 1178258432.0,
+            "64": 1178258432.0,
+            "65": 1178258432.0,
+            "66": 1178258432.0,
+            "67": 1178258432.0,
+            "68": 1178258432.0,
+            "69": 1178258432.0,
+            "70": 1178258432.0,
+            "71": 1178258432.0,
+            "72": 1178258432.0,
+            "73": 1178258432.0,
+            "74": 1178258432.0,
+            "75": 1178258432.0,
+            "76": 1178258432.0,
+            "77": 1178258432.0,
+            "78": 1178258432.0,
+            "79": 1178258432.0,
+            "80": 1178258432.0,
+            "81": 1178258432.0,
+            "82": 1178258432.0,
+            "83": 1178258432.0,
+            "84": 1178258432.0,
+            "85": 1178258432.0,
+            "86": 1178258432.0,
+            "87": 1178258432.0,
+            "88": 1178258432.0,
+            "89": 1178258432.0,
+            "90": 1178258432.0,
+            "91": 1178258432.0,
+            "92": 1178258432.0,
+            "93": 1178258432.0,
+            "94": 1178258432.0,
+            "95": 1178258432.0,
+            "96": 1178258432.0,
+            "97": 1178258432.0,
+            "98": 1178258432.0,
+            "99": 1178258432.0,
+            "100": 1178258432.0
         }
     },
     "mtp_1 loss": {
@@ -434,104 +434,104 @@
         "values": {
             "1": 10.91546,
             "2": 10.92323,
-            "3": 10.93384,
-            "4": 10.92739,
-            "5": 10.90724,
-            "6": 10.91817,
-            "7": 10.92486,
-            "8": 10.92528,
-            "9": 10.93457,
-            "10": 10.9265,
-            "11": 10.91896,
-            "12": 10.91863,
-            "13": 10.92808,
-            "14": 10.91206,
-            "15": 10.91984,
-            "16": 10.92451,
-            "17": 10.92227,
-            "18": 10.90737,
-            "19": 10.91483,
-            "20": 10.90522,
-            "21": 10.9114,
-            "22": 10.89772,
-            "23": 10.90537,
-            "24": 10.89029,
-            "25": 10.89787,
-            "26": 10.88468,
+            "3": 10.93397,
+            "4": 10.92751,
+            "5": 10.90737,
+            "6": 10.91848,
+            "7": 10.92458,
+            "8": 10.92554,
+            "9": 10.93463,
+            "10": 10.92668,
+            "11": 10.91878,
+            "12": 10.9187,
+            "13": 10.92838,
+            "14": 10.91232,
+            "15": 10.92007,
+            "16": 10.92421,
+            "17": 10.92201,
+            "18": 10.90719,
+            "19": 10.91465,
+            "20": 10.90473,
+            "21": 10.91184,
+            "22": 10.89773,
+            "23": 10.90467,
+            "24": 10.88981,
+            "25": 10.89799,
+            "26": 10.88458,
             "27": 10.89842,
-            "28": 10.8909,
-            "29": 10.87535,
-            "30": 10.88065,
-            "31": 10.87294,
-            "32": 10.87864,
-            "33": 10.86983,
-            "34": 10.86798,
-            "35": 10.85949,
-            "36": 10.8618,
-            "37": 10.85516,
-            "38": 10.85688,
-            "39": 10.84912,
-            "40": 10.86276,
-            "41": 10.85336,
-            "42": 10.84776,
-            "43": 10.8455,
-            "44": 10.83817,
-            "45": 10.84937,
-            "46": 10.83807,
-            "47": 10.83805,
-            "48": 10.83108,
-            "49": 10.82947,
-            "50": 10.82233,
-            "51": 10.82166,
-            "52": 10.82114,
-            "53": 10.8067,
-            "54": 10.8107,
-            "55": 10.79431,
-            "56": 10.79976,
-            "57": 10.78946,
-            "58": 10.79833,
-            "59": 10.78093,
-            "60": 10.77476,
-            "61": 10.77647,
-            "62": 10.76099,
-            "63": 10.78365,
-            "64": 10.75478,
-            "65": 10.75021,
-            "66": 10.75701,
-            "67": 10.73475,
-            "68": 10.72894,
-            "69": 10.72604,
-            "70": 10.72547,
-            "71": 10.72458,
-            "72": 10.7195,
-            "73": 10.71167,
-            "74": 10.704,
-            "75": 10.68533,
-            "76": 10.69498,
-            "77": 10.69053,
+            "28": 10.89072,
+            "29": 10.87529,
+            "30": 10.88012,
+            "31": 10.8727,
+            "32": 10.87838,
+            "33": 10.86974,
+            "34": 10.86841,
+            "35": 10.85917,
+            "36": 10.86175,
+            "37": 10.85541,
+            "38": 10.85717,
+            "39": 10.849,
+            "40": 10.86294,
+            "41": 10.85311,
+            "42": 10.84765,
+            "43": 10.84575,
+            "44": 10.83781,
+            "45": 10.84929,
+            "46": 10.83794,
+            "47": 10.83823,
+            "48": 10.83113,
+            "49": 10.82968,
+            "50": 10.82226,
+            "51": 10.82165,
+            "52": 10.82088,
+            "53": 10.8066,
+            "54": 10.81086,
+            "55": 10.79429,
+            "56": 10.79986,
+            "57": 10.78953,
+            "58": 10.7985,
+            "59": 10.78091,
+            "60": 10.77519,
+            "61": 10.77652,
+            "62": 10.76107,
+            "63": 10.78407,
+            "64": 10.75476,
+            "65": 10.75019,
+            "66": 10.75694,
+            "67": 10.73504,
+            "68": 10.72867,
+            "69": 10.72583,
+            "70": 10.72571,
+            "71": 10.72463,
+            "72": 10.71967,
+            "73": 10.71173,
+            "74": 10.70397,
+            "75": 10.68536,
+            "76": 10.6951,
+            "77": 10.69081,
             "78": 10.68213,
-            "79": 10.6697,
-            "80": 10.67692,
-            "81": 10.66916,
-            "82": 10.65024,
-            "83": 10.62678,
-            "84": 10.61021,
-            "85": 10.6026,
-            "86": 10.64309,
-            "87": 10.63639,
-            "88": 10.63081,
-            "89": 10.59534,
-            "90": 10.58433,
-            "91": 10.60787,
-            "92": 10.58304,
-            "93": 10.56199,
-            "94": 10.59372,
-            "95": 10.57621,
-            "96": 10.57236,
-            "97": 10.55407,
-            "98": 10.5595,
-            "99": 10.55809,
-            "100": 10.5283
+            "79": 10.66966,
+            "80": 10.677,
+            "81": 10.66877,
+            "82": 10.6502,
+            "83": 10.62666,
+            "84": 10.61061,
+            "85": 10.60269,
+            "86": 10.64312,
+            "87": 10.63649,
+            "88": 10.63061,
+            "89": 10.59523,
+            "90": 10.58422,
+            "91": 10.60753,
+            "92": 10.58273,
+            "93": 10.56197,
+            "94": 10.59313,
+            "95": 10.57585,
+            "96": 10.57208,
+            "97": 10.55381,
+            "98": 10.55891,
+            "99": 10.55782,
+            "100": 10.52811
         }
     },
     "iteration-time": {
@@ -540,105 +540,105 @@
         "step_interval": 1,
         "values": {
             "1": "nan",
-            "2": 27.18208,
-            "3": 1.39529,
-            "4": 3.6491,
-            "5": 0.67179,
-            "6": 0.67671,
-            "7": 0.66994,
-            "8": 0.66973,
-            "9": 0.67108,
-            "10": 0.67559,
-            "11": 0.67217,
-            "12": 0.67331,
-            "13": 0.66954,
-            "14": 0.67002,
-            "15": 0.67239,
-            "16": 0.76215,
-            "17": 0.69839,
-            "18": 0.68015,
-            "19": 0.69381,
-            "20": 0.68775,
-            "21": 0.69137,
-            "22": 0.68806,
-            "23": 0.69976,
-            "24": 0.77931,
-            "25": 0.76553,
-            "26": 0.68909,
-            "27": 0.68374,
-            "28": 0.68045,
-            "29": 0.6771,
-            "30": 0.67224,
-            "31": 0.67362,
-            "32": 0.67682,
-            "33": 0.672,
-            "34": 0.67674,
-            "35": 0.67276,
-            "36": 0.67257,
-            "37": 0.67332,
-            "38": 0.68112,
-            "39": 0.67781,
-            "40": 0.67515,
-            "41": 0.67587,
-            "42": 0.6733,
-            "43": 0.67545,
-            "44": 0.67335,
-            "45": 0.68357,
-            "46": 0.68261,
-            "47": 0.68097,
-            "48": 0.68067,
-            "49": 0.68428,
-            "50": 0.68183,
-            "51": 0.69468,
-            "52": 0.68108,
-            "53": 0.683,
-            "54": 0.68569,
-            "55": 0.68183,
-            "56": 0.68275,
-            "57": 0.6821,
-            "58": 0.68182,
-            "59": 0.68538,
-            "60": 0.68324,
-            "61": 0.68519,
-            "62": 0.68243,
-            "63": 0.68308,
-            "64": 0.69526,
-            "65": 0.68084,
-            "66": 0.68955,
-            "67": 0.68442,
-            "68": 0.68126,
-            "69": 0.68341,
-            "70": 0.68587,
-            "71": 0.68166,
-            "72": 0.68346,
-            "73": 0.68477,
-            "74": 0.6857,
-            "75": 0.68228,
-            "76": 0.68263,
-            "77": 0.67013,
-            "78": 0.66937,
-            "79": 0.66958,
-            "80": 0.66944,
-            "81": 0.67111,
-            "82": 0.67321,
-            "83": 0.66983,
-            "84": 0.67414,
-            "85": 0.67114,
-            "86": 0.67054,
-            "87": 0.66936,
-            "88": 0.66939,
-            "89": 0.66786,
-            "90": 0.66981,
-            "91": 0.66651,
-            "92": 0.67627,
-            "93": 0.68747,
-            "94": 0.67136,
-            "95": 0.67193,
-            "96": 0.67111,
-            "97": 0.66996,
-            "98": 0.68055,
-            "99": 0.6806,
-            "100": 0.67843
+            "2": 27.91418,
+            "3": 1.8444,
+            "4": 4.36938,
+            "5": 0.9997,
+            "6": 1.00434,
+            "7": 0.99907,
+            "8": 1.00275,
+            "9": 0.99461,
+            "10": 0.99275,
+            "11": 0.97843,
+            "12": 0.98765,
+            "13": 0.9903,
+            "14": 1.00077,
+            "15": 1.0,
+            "16": 0.98823,
+            "17": 0.98199,
+            "18": 0.9877,
+            "19": 0.98886,
+            "20": 0.99983,
+            "21": 0.98962,
+            "22": 0.99635,
+            "23": 0.96454,
+            "24": 0.93898,
+            "25": 0.96491,
+            "26": 0.98141,
+            "27": 0.95293,
+            "28": 0.95301,
+            "29": 0.94879,
+            "30": 0.98802,
+            "31": 0.98495,
+            "32": 0.99868,
+            "33": 0.98867,
+            "34": 0.99852,
+            "35": 1.00176,
+            "36": 0.99104,
+            "37": 0.99448,
+            "38": 0.99426,
+            "39": 0.9992,
+            "40": 0.99262,
+            "41": 0.99458,
+            "42": 0.99928,
+            "43": 0.99527,
+            "44": 0.99574,
+            "45": 0.99947,
+            "46": 1.00027,
+            "47": 0.99784,
+            "48": 0.99625,
+            "49": 1.0035,
+            "50": 1.01101,
+            "51": 1.07145,
+            "52": 0.99811,
+            "53": 1.0032,
+            "54": 0.9989,
+            "55": 0.99747,
+            "56": 0.99838,
+            "57": 1.01334,
+            "58": 0.99915,
+            "59": 0.98789,
+            "60": 0.98956,
+            "61": 0.99815,
+            "62": 0.99777,
+            "63": 0.99925,
+            "64": 1.01132,
+            "65": 0.99811,
+            "66": 0.99272,
+            "67": 0.99779,
+            "68": 1.00819,
+            "69": 1.00743,
+            "70": 1.0067,
+            "71": 0.99506,
+            "72": 1.00152,
+            "73": 0.99805,
+            "74": 1.00601,
+            "75": 1.00578,
+            "76": 1.0047,
+            "77": 1.00041,
+            "78": 1.01124,
+            "79": 1.00206,
+            "80": 1.00293,
+            "81": 1.00212,
+            "82": 1.00771,
+            "83": 0.99432,
+            "84": 0.99804,
+            "85": 1.03671,
+            "86": 0.99424,
+            "87": 0.99658,
+            "88": 0.99251,
+            "89": 0.99387,
+            "90": 0.99986,
+            "91": 0.99704,
+            "92": 1.00218,
+            "93": 1.00685,
+            "94": 1.00914,
+            "95": 0.99614,
+            "96": 0.99789,
+            "97": 1.00812,
+            "98": 1.00062,
+            "99": 0.99776,
+            "100": 0.99241
         }
     }
 }
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_coreweave.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_coreweave.json
new file mode 100644
index 00000000000..309b2533461
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_coreweave.json
@@ -0,0 +1,644 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93663,
+            "2": 10.9327,
+            "3": 10.94263,
+            "4": 10.94969,
+            "5": 10.95052,
+            "6": 10.94157,
+            "7": 10.94484,
+            "8": 10.93674,
+            "9": 10.94996,
+            "10": 10.93686,
+            "11": 10.94102,
+            "12": 10.93763,
+            "13": 10.9235,
+            "14": 10.93428,
+            "15": 10.88791,
+            "16": 10.87434,
+            "17": 10.86896,
+            "18": 10.86065,
+            "19": 10.86311,
+            "20": 10.78063,
+            "21": 10.73125,
+            "22": 10.60283,
+            "23": 10.73278,
+            "24": 10.61888,
+            "25": 10.55212,
+            "26": 10.62704,
+            "27": 10.6391,
+            "28": 10.5908,
+            "29": 10.59809,
+            "30": 10.37777,
+            "31": 10.1201,
+            "32": 10.46078,
+            "33": 10.45538,
+            "34": 10.20107,
+            "35": 10.25779,
+            "36": 10.20889,
+            "37": 10.33688,
+            "38": 10.16827,
+            "39": 10.40875,
+            "40": 10.05239,
+            "41": 10.09432,
+            "42": 10.17894,
+            "43": 9.74205,
+            "44": 9.8904,
+            "45": 9.74009,
+            "46": 9.72707,
+            "47": 10.09139,
+            "48": 9.75298,
+            "49": 9.40106,
+            "50": 9.83667,
+            "51": 9.77071,
+            "52": 9.65705,
+            "53": 10.03051,
+            "54": 9.87899,
+            "55": 9.79604,
+            "56": 9.52924,
+            "57": 9.36583,
+            "58": 9.75331,
+            "59": 9.48065,
+            "60": 9.40785,
+            "61": 9.60145,
+            "62": 9.90753,
+            "63": 9.2583,
+            "64": 9.68397,
+            "65": 8.80003,
+            "66": 9.60779,
+            "67": 9.25408,
+            "68": 9.71438,
+            "69": 9.71682,
+            "70": 9.6617,
+            "71": 9.52466,
+            "72": 9.47116,
+            "73": 9.38822,
+            "74": 8.80223,
+            "75": 9.33966,
+            "76": 8.93574,
+            "77": 9.99333,
+            "78": 9.64731,
+            "79": 9.28114,
+            "80": 9.29588,
+            "81": 9.39589,
+            "82": 9.60893,
+            "83": 9.21629,
+            "84": 9.33891,
+            "85": 9.52979,
+            "86": 8.95817,
+            "87": 9.51641,
+            "88": 9.68228,
+            "89": 9.50664,
+            "90": 9.75348,
+            "91": 9.23465,
+            "92": 9.25972,
+            "93": 8.94517,
+            "94": 8.69188,
+            "95": 9.44591,
+            "96": 9.4101,
+            "97": 9.20087,
+            "98": 9.58175,
+            "99": 8.75818,
+            "100": 9.29466
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22750260.0,
+            "2": 22953110.0,
+            "3": 22604450.0,
+            "4": 23266322.0,
+            "5": 22735560.0,
+            "6": 23061920.0,
+            "7": 22793342.0,
+            "8": 22960820.0,
+            "9": 22865664.0,
+            "10": 22950364.0,
+            "11": 22499674.0,
+            "12": 22456088.0,
+            "13": 22948060.0,
+            "14": 22384512.0,
+            "15": 22846272.0,
+            "16": 22856858.0,
+            "17": 22836412.0,
+            "18": 22590058.0,
+            "19": 22627048.0,
+            "20": 22712308.0,
+            "21": 22762624.0,
+            "22": 22816888.0,
+            "23": 22545124.0,
+            "24": 22794440.0,
+            "25": 22841936.0,
+            "26": 22549680.0,
+            "27": 22464820.0,
+            "28": 22453684.0,
+            "29": 22534640.0,
+            "30": 22636152.0,
+            "31": 22989488.0,
+            "32": 22594070.0,
+            "33": 22566010.0,
+            "34": 22855504.0,
+            "35": 22813688.0,
+            "36": 22595396.0,
+            "37": 22499360.0,
+            "38": 22926126.0,
+            "39": 22825392.0,
+            "40": 22675666.0,
+            "41": 22671586.0,
+            "42": 22682140.0,
+            "43": 23013940.0,
+            "44": 22764458.0,
+            "45": 22678992.0,
+            "46": 22915276.0,
+            "47": 22642868.0,
+            "48": 22954190.0,
+            "49": 23786668.0,
+            "50": 22934008.0,
+            "51": 23866222.0,
+            "52": 23807290.0,
+            "53": 24007532.0,
+            "54": 22871610.0,
+            "55": 23571284.0,
+            "56": 23954310.0,
+            "57": 24211632.0,
+            "58": 23914404.0,
+            "59": 23771838.0,
+            "60": 23813560.0,
+            "61": 23797288.0,
+            "62": 23739984.0,
+            "63": 23916692.0,
+            "64": 23895952.0,
+            "65": 24150562.0,
+            "66": 23796504.0,
+            "67": 25032232.0,
+            "68": 23673188.0,
+            "69": 23648580.0,
+            "70": 23903504.0,
+            "71": 24864636.0,
+            "72": 24767108.0,
+            "73": 24850612.0,
+            "74": 24132990.0,
+            "75": 24146528.0,
+            "76": 25025540.0,
+            "77": 24358472.0,
+            "78": 24910064.0,
+            "79": 23810516.0,
+            "80": 24821440.0,
+            "81": 25020512.0,
+            "82": 23851244.0,
+            "83": 24961024.0,
+            "84": 25144020.0,
+            "85": 24823608.0,
+            "86": 23153096.0,
+            "87": 24850204.0,
+            "88": 24749150.0,
+            "89": 22505554.0,
+            "90": 24059620.0,
+            "91": 23839038.0,
+            "92": 23874568.0,
+            "93": 24769548.0,
+            "94": 23992452.0,
+            "95": 25189838.0,
+            "96": 23909262.0,
+            "97": 24713068.0,
+            "98": 23832506.0,
+            "99": 23983474.0,
+            "100": 24101108.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 763142656.0,
+            "2": 778734592.0,
+            "3": 772525056.0,
+            "4": 803593216.0,
+            "5": 803593216.0,
+            "6": 803593216.0,
+            "7": 801299456.0,
+            "8": 803593216.0,
+            "9": 801840128.0,
+            "10": 803593216.0,
+            "11": 802987008.0,
+            "12": 803593216.0,
+            "13": 802987008.0,
+            "14": 801299456.0,
+            "15": 803593216.0,
+            "16": 801840128.0,
+            "17": 803593216.0,
+            "18": 802987008.0,
+            "19": 801299456.0,
+            "20": 803593216.0,
+            "21": 801299456.0,
+            "22": 803593216.0,
+            "23": 801299456.0,
+            "24": 803593216.0,
+            "25": 801299456.0,
+            "26": 803593216.0,
+            "27": 801299456.0,
+            "28": 803593216.0,
+            "29": 801299456.0,
+            "30": 803593216.0,
+            "31": 801299456.0,
+            "32": 803593216.0,
+            "33": 801840128.0,
+            "34": 803593216.0,
+            "35": 801840128.0,
+            "36": 803593216.0,
+            "37": 802987008.0,
+            "38": 801299456.0,
+            "39": 803593216.0,
+            "40": 801299456.0,
+            "41": 803593216.0,
+            "42": 801840128.0,
+            "43": 803593216.0,
+            "44": 801840128.0,
+            "45": 803593216.0,
+            "46": 801840128.0,
+            "47": 803593216.0,
+            "48": 801840128.0,
+            "49": 803593216.0,
+            "50": 801840128.0,
+            "51": 801299456.0,
+            "52": 803593216.0,
+            "53": 801299456.0,
+            "54": 803593216.0,
+            "55": 801840128.0,
+            "56": 803593216.0,
+            "57": 801840128.0,
+            "58": 803593216.0,
+            "59": 801840128.0,
+            "60": 803593216.0,
+            "61": 801299456.0,
+            "62": 803593216.0,
+            "63": 801299456.0,
+            "64": 802987008.0,
+            "65": 803593216.0,
+            "66": 801299456.0,
+            "67": 803593216.0,
+            "68": 801299456.0,
+            "69": 803593216.0,
+            "70": 801840128.0,
+            "71": 803593216.0,
+            "72": 801299456.0,
+            "73": 803593216.0,
+            "74": 803593216.0,
+            "75": 802987008.0,
+            "76": 803593216.0,
+            "77": 801840128.0,
+            "78": 803593216.0,
+            "79": 801299456.0,
+            "80": 802987008.0,
+            "81": 803593216.0,
+            "82": 801840128.0,
+            "83": 803593216.0,
+            "84": 801299456.0,
+            "85": 802987008.0,
+            "86": 803593216.0,
+            "87": 801840128.0,
+            "88": 803593216.0,
+            "89": 801299456.0,
+            "90": 802987008.0,
+            "91": 803593216.0,
+            "92": 801299456.0,
+            "93": 803593216.0,
+            "94": 801299456.0,
+            "95": 803593216.0,
+            "96": 801299456.0,
+            "97": 803593216.0,
+            "98": 801299456.0,
+            "99": 802987008.0,
+            "100": 803593216.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 993582592.0,
+            "2": 1210942464.0,
+            "3": 1210942464.0,
+            "4": 1210942464.0,
+            "5": 1210942464.0,
+            "6": 1210942464.0,
+            "7": 1210942464.0,
+            "8": 1210942464.0,
+            "9": 1210942464.0,
+            "10": 1210942464.0,
+            "11": 1210942464.0,
+            "12": 1210942464.0,
+            "13": 1210942464.0,
+            "14": 1210942464.0,
+            "15": 1210942464.0,
+            "16": 1210942464.0,
+            "17": 1210942464.0,
+            "18": 1210942464.0,
+            "19": 1210942464.0,
+            "20": 1210942464.0,
+            "21": 1210942464.0,
+            "22": 1210942464.0,
+            "23": 1210942464.0,
+            "24": 1210942464.0,
+            "25": 1210942464.0,
+            "26": 1210942464.0,
+            "27": 1210942464.0,
+            "28": 1210942464.0,
+            "29": 1210942464.0,
+            "30": 1210942464.0,
+            "31": 1210942464.0,
+            "32": 1210942464.0,
+            "33": 1210942464.0,
+            "34": 1210942464.0,
+            "35": 1210942464.0,
+            "36": 1210942464.0,
+            "37": 1210942464.0,
+            "38": 1210942464.0,
+            "39": 1210942464.0,
+            "40": 1210942464.0,
+            "41": 1210942464.0,
+            "42": 1210942464.0,
+            "43": 1210942464.0,
+            "44": 1210942464.0,
+            "45": 1210942464.0,
+            "46": 1210942464.0,
+            "47": 1210942464.0,
+            "48": 1210942464.0,
+            "49": 1210942464.0,
+            "50": 1210942464.0,
+            "51": 1210942464.0,
+            "52": 1210942464.0,
+            "53": 1210942464.0,
+            "54": 1210942464.0,
+            "55": 1210942464.0,
+            "56": 1210942464.0,
+            "57": 1210942464.0,
+            "58": 1210942464.0,
+            "59": 1210942464.0,
+            "60": 1210942464.0,
+            "61": 1210942464.0,
+            "62": 1210942464.0,
+            "63": 1210942464.0,
+            "64": 1210942464.0,
+            "65": 1210942464.0,
+            "66": 1210942464.0,
+            "67": 1210942464.0,
+            "68": 1210942464.0,
+            "69": 1210942464.0,
+            "70": 1210942464.0,
+            "71": 1210942464.0,
+            "72": 1210942464.0,
+            "73": 1210942464.0,
+            "74": 1210942464.0,
+            "75": 1210942464.0,
+            "76": 1210942464.0,
+            "77": 1210942464.0,
+            "78": 1210942464.0,
+            "79": 1210942464.0,
+            "80": 1210942464.0,
+            "81": 1210942464.0,
+            "82": 1210942464.0,
+            "83": 1210942464.0,
+            "84": 1210942464.0,
+            "85": 1210942464.0,
+            "86": 1210942464.0,
+            "87": 1210942464.0,
+            "88": 1210942464.0,
+            "89": 1210942464.0,
+            "90": 1210942464.0,
+            "91": 1210942464.0,
+            "92": 1210942464.0,
+            "93": 1210942464.0,
+            "94": 1210942464.0,
+            "95": 1210942464.0,
+            "96": 1210942464.0,
+            "97": 1210942464.0,
+            "98": 1210942464.0,
+            "99": 1210942464.0,
+            "100": 1210942464.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88689,
+            "2": 10.90485,
+            "3": 10.90869,
+            "4": 10.86903,
+            "5": 10.91601,
+            "6": 10.906,
+            "7": 10.90268,
+            "8": 10.88984,
+            "9": 10.90425,
+            "10": 10.89144,
+            "11": 10.93384,
+            "12": 10.91647,
+            "13": 10.91108,
+            "14": 10.91974,
+            "15": 10.88488,
+            "16": 10.9077,
+            "17": 10.87571,
+            "18": 10.91379,
+            "19": 10.9092,
+            "20": 10.87837,
+            "21": 10.87896,
+            "22": 10.85583,
+            "23": 10.88007,
+            "24": 10.87245,
+            "25": 10.85859,
+            "26": 10.8696,
+            "27": 10.87702,
+            "28": 10.88641,
+            "29": 10.88866,
+            "30": 10.85422,
+            "31": 10.79713,
+            "32": 10.86631,
+            "33": 10.8781,
+            "34": 10.83982,
+            "35": 10.84165,
+            "36": 10.85012,
+            "37": 10.85556,
+            "38": 10.83674,
+            "39": 10.86355,
+            "40": 10.82887,
+            "41": 10.8341,
+            "42": 10.84469,
+            "43": 10.78828,
+            "44": 10.82123,
+            "45": 10.78831,
+            "46": 10.7823,
+            "47": 10.82898,
+            "48": 10.78985,
+            "49": 10.71269,
+            "50": 10.77382,
+            "51": 10.76639,
+            "52": 10.7397,
+            "53": 10.80285,
+            "54": 10.77365,
+            "55": 10.76066,
+            "56": 10.71068,
+            "57": 10.66686,
+            "58": 10.74378,
+            "59": 10.69209,
+            "60": 10.66474,
+            "61": 10.7073,
+            "62": 10.77206,
+            "63": 10.61812,
+            "64": 10.7178,
+            "65": 10.49439,
+            "66": 10.67106,
+            "67": 10.57534,
+            "68": 10.6873,
+            "69": 10.6816,
+            "70": 10.66836,
+            "71": 10.64586,
+            "72": 10.60925,
+            "73": 10.56508,
+            "74": 10.37144,
+            "75": 10.51183,
+            "76": 10.39914,
+            "77": 10.75182,
+            "78": 10.6268,
+            "79": 10.46827,
+            "80": 10.47524,
+            "81": 10.51083,
+            "82": 10.58769,
+            "83": 10.4381,
+            "84": 10.45057,
+            "85": 10.55084,
+            "86": 10.28076,
+            "87": 10.51088,
+            "88": 10.60323,
+            "89": 10.50794,
+            "90": 10.60274,
+            "91": 10.38238,
+            "92": 10.38703,
+            "93": 10.23076,
+            "94": 10.08438,
+            "95": 10.42616,
+            "96": 10.44905,
+            "97": 10.32215,
+            "98": 10.4966,
+            "99": 10.04765,
+            "100": 10.33491
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 51.30209,
+            "2": 1.41746,
+            "3": 1.28029,
+            "4": 10.57024,
+            "5": 0.66643,
+            "6": 0.67893,
+            "7": 0.65727,
+            "8": 0.66196,
+            "9": 0.66227,
+            "10": 0.65877,
+            "11": 0.65828,
+            "12": 0.65862,
+            "13": 0.65727,
+            "14": 0.65896,
+            "15": 0.65851,
+            "16": 0.66826,
+            "17": 0.65878,
+            "18": 0.65573,
+            "19": 0.65631,
+            "20": 0.65579,
+            "21": 0.65091,
+            "22": 0.65603,
+            "23": 0.65158,
+            "24": 0.65266,
+            "25": 0.65816,
+            "26": 0.65194,
+            "27": 0.6541,
+            "28": 0.65515,
+            "29": 0.65439,
+            "30": 0.65241,
+            "31": 0.65597,
+            "32": 0.65551,
+            "33": 0.65318,
+            "34": 0.6553,
+            "35": 0.65725,
+            "36": 0.65926,
+            "37": 0.65606,
+            "38": 0.65571,
+            "39": 0.65846,
+            "40": 0.65642,
+            "41": 0.65509,
+            "42": 0.66105,
+            "43": 0.65448,
+            "44": 0.65534,
+            "45": 0.65304,
+            "46": 0.65227,
+            "47": 0.64871,
+            "48": 0.65257,
+            "49": 0.65485,
+            "50": 0.65054,
+            "51": 0.67883,
+            "52": 0.6571,
+            "53": 0.65671,
+            "54": 0.65877,
+            "55": 0.65584,
+            "56": 0.65072,
+            "57": 0.64951,
+            "58": 0.65703,
+            "59": 0.65106,
+            "60": 0.64536,
+            "61": 0.64416,
+            "62": 0.64816,
+            "63": 0.64084,
+            "64": 0.6396,
+            "65": 0.64182,
+            "66": 0.64004,
+            "67": 0.64101,
+            "68": 0.63928,
+            "69": 0.65723,
+            "70": 0.6828,
+            "71": 0.64052,
+            "72": 0.64287,
+            "73": 0.64136,
+            "74": 0.64252,
+            "75": 0.64617,
+            "76": 0.64857,
+            "77": 0.64304,
+            "78": 0.64068,
+            "79": 0.64048,
+            "80": 0.64091,
+            "81": 0.64179,
+            "82": 0.64793,
+            "83": 0.641,
+            "84": 0.64077,
+            "85": 0.64011,
+            "86": 0.64018,
+            "87": 0.64132,
+            "88": 0.63901,
+            "89": 0.6407,
+            "90": 0.64277,
+            "91": 0.64132,
+            "92": 0.64123,
+            "93": 0.65051,
+            "94": 0.65036,
+            "95": 0.64542,
+            "96": 0.64561,
+            "97": 0.6504,
+            "98": 0.64563,
+            "99": 0.64524,
+            "100": 0.65049
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_eos.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_eos.json
new file mode 100644
index 00000000000..e8c2bae571f
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgxh100_eos.json
@@ -0,0 +1,644 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.93663,
+            "2": 10.9327,
+            "3": 10.94263,
+            "4": 10.94969,
+            "5": 10.95052,
+            "6": 10.94157,
+            "7": 10.94484,
+            "8": 10.93674,
+            "9": 10.94996,
+            "10": 10.93686,
+            "11": 10.94102,
+            "12": 10.93763,
+            "13": 10.9235,
+            "14": 10.93428,
+            "15": 10.88791,
+            "16": 10.87434,
+            "17": 10.86896,
+            "18": 10.86065,
+            "19": 10.86311,
+            "20": 10.78063,
+            "21": 10.73125,
+            "22": 10.60283,
+            "23": 10.73278,
+            "24": 10.61888,
+            "25": 10.55212,
+            "26": 10.62704,
+            "27": 10.6391,
+            "28": 10.5908,
+            "29": 10.59809,
+            "30": 10.37777,
+            "31": 10.1201,
+            "32": 10.46078,
+            "33": 10.45538,
+            "34": 10.20107,
+            "35": 10.25779,
+            "36": 10.20889,
+            "37": 10.33688,
+            "38": 10.16827,
+            "39": 10.40875,
+            "40": 10.05239,
+            "41": 10.09432,
+            "42": 10.17894,
+            "43": 9.74205,
+            "44": 9.8904,
+            "45": 9.74009,
+            "46": 9.72707,
+            "47": 10.09139,
+            "48": 9.75298,
+            "49": 9.40106,
+            "50": 9.83667,
+            "51": 9.77071,
+            "52": 9.65705,
+            "53": 10.03051,
+            "54": 9.87899,
+            "55": 9.79604,
+            "56": 9.52924,
+            "57": 9.36583,
+            "58": 9.75331,
+            "59": 9.48065,
+            "60": 9.40785,
+            "61": 9.60145,
+            "62": 9.90753,
+            "63": 9.2583,
+            "64": 9.68397,
+            "65": 8.80003,
+            "66": 9.60779,
+            "67": 9.25408,
+            "68": 9.71438,
+            "69": 9.71682,
+            "70": 9.6617,
+            "71": 9.52466,
+            "72": 9.47116,
+            "73": 9.38822,
+            "74": 8.80223,
+            "75": 9.33966,
+            "76": 8.93574,
+            "77": 9.99333,
+            "78": 9.64731,
+            "79": 9.28114,
+            "80": 9.29588,
+            "81": 9.39589,
+            "82": 9.60893,
+            "83": 9.21629,
+            "84": 9.33891,
+            "85": 9.52979,
+            "86": 8.95817,
+            "87": 9.51641,
+            "88": 9.68228,
+            "89": 9.50664,
+            "90": 9.75348,
+            "91": 9.23465,
+            "92": 9.25972,
+            "93": 8.94517,
+            "94": 8.69188,
+            "95": 9.44591,
+            "96": 9.4101,
+            "97": 9.20087,
+            "98": 9.58175,
+            "99": 8.75818,
+            "100": 9.29466
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 22750260.0,
+            "2": 22953110.0,
+            "3": 22604450.0,
+            "4": 23266322.0,
+            "5": 22735560.0,
+            "6": 23061920.0,
+            "7": 22793342.0,
+            "8": 22960820.0,
+            "9": 22865664.0,
+            "10": 22950364.0,
+            "11": 22499674.0,
+            "12": 22456088.0,
+            "13": 22948060.0,
+            "14": 22384512.0,
+            "15": 22846272.0,
+            "16": 22856858.0,
+            "17": 22836412.0,
+            "18": 22590058.0,
+            "19": 22627048.0,
+            "20": 22712308.0,
+            "21": 22762624.0,
+            "22": 22816888.0,
+            "23": 22545124.0,
+            "24": 22794440.0,
+            "25": 22841936.0,
+            "26": 22549680.0,
+            "27": 22464820.0,
+            "28": 22453684.0,
+            "29": 22534640.0,
+            "30": 22636152.0,
+            "31": 22989488.0,
+            "32": 22594070.0,
+            "33": 22566010.0,
+            "34": 22855504.0,
+            "35": 22813688.0,
+            "36": 22595396.0,
+            "37": 22499360.0,
+            "38": 22926126.0,
+            "39": 22825392.0,
+            "40": 22675666.0,
+            "41": 22671586.0,
+            "42": 22682140.0,
+            "43": 23013940.0,
+            "44": 22764458.0,
+            "45": 22678992.0,
+            "46": 22915276.0,
+            "47": 22642868.0,
+            "48": 22954190.0,
+            "49": 23786668.0,
+            "50": 22934008.0,
+            "51": 23866222.0,
+            "52": 23807290.0,
+            "53": 24007532.0,
+            "54": 22871610.0,
+            "55": 23571284.0,
+            "56": 23954310.0,
+            "57": 24211632.0,
+            "58": 23914404.0,
+            "59": 23771838.0,
+            "60": 23813560.0,
+            "61": 23797288.0,
+            "62": 23739984.0,
+            "63": 23916692.0,
+            "64": 23895952.0,
+            "65": 24150562.0,
+            "66": 23796504.0,
+            "67": 25032232.0,
+            "68": 23673188.0,
+            "69": 23648580.0,
+            "70": 23903504.0,
+            "71": 24864636.0,
+            "72": 24767108.0,
+            "73": 24850612.0,
+            "74": 24132990.0,
+            "75": 24146528.0,
+            "76": 25025540.0,
+            "77": 24358472.0,
+            "78": 24910064.0,
+            "79": 23810516.0,
+            "80": 24821440.0,
+            "81": 25020512.0,
+            "82": 23851244.0,
+            "83": 24961024.0,
+            "84": 25144020.0,
+            "85": 24823608.0,
+            "86": 23153096.0,
+            "87": 24850204.0,
+            "88": 24749150.0,
+            "89": 22505554.0,
+            "90": 24059620.0,
+            "91": 23839038.0,
+            "92": 23874568.0,
+            "93": 24769548.0,
+            "94": 23992452.0,
+            "95": 25189838.0,
+            "96": 23909262.0,
+            "97": 24713068.0,
+            "98": 23832506.0,
+            "99": 23983474.0,
+            "100": 24101108.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 769688064.0,
+            "2": 775359488.0,
+            "3": 769690624.0,
+            "4": 801299456.0,
+            "5": 803593216.0,
+            "6": 801299456.0,
+            "7": 803593216.0,
+            "8": 803593216.0,
+            "9": 801299456.0,
+            "10": 803593216.0,
+            "11": 801299456.0,
+            "12": 803593216.0,
+            "13": 801299456.0,
+            "14": 803593216.0,
+            "15": 803593216.0,
+            "16": 801299456.0,
+            "17": 803593216.0,
+            "18": 801299456.0,
+            "19": 803593216.0,
+            "20": 801299456.0,
+            "21": 803593216.0,
+            "22": 803593216.0,
+            "23": 801840128.0,
+            "24": 803593216.0,
+            "25": 802987008.0,
+            "26": 801299456.0,
+            "27": 802987008.0,
+            "28": 801299456.0,
+            "29": 801299456.0,
+            "30": 803593216.0,
+            "31": 801299456.0,
+            "32": 803593216.0,
+            "33": 801299456.0,
+            "34": 803593216.0,
+            "35": 801299456.0,
+            "36": 801299456.0,
+            "37": 803593216.0,
+            "38": 801299456.0,
+            "39": 803593216.0,
+            "40": 801299456.0,
+            "41": 803593216.0,
+            "42": 801299456.0,
+            "43": 801299456.0,
+            "44": 803593216.0,
+            "45": 802987008.0,
+            "46": 801299456.0,
+            "47": 803593216.0,
+            "48": 801299456.0,
+            "49": 803593216.0,
+            "50": 801299456.0,
+            "51": 801299456.0,
+            "52": 803593216.0,
+            "53": 802446336.0,
+            "54": 801299456.0,
+            "55": 803593216.0,
+            "56": 802987008.0,
+            "57": 801299456.0,
+            "58": 801840128.0,
+            "59": 801299456.0,
+            "60": 803593216.0,
+            "61": 801840128.0,
+            "62": 801299456.0,
+            "63": 803593216.0,
+            "64": 802446336.0,
+            "65": 803593216.0,
+            "66": 801840128.0,
+            "67": 801299456.0,
+            "68": 803593216.0,
+            "69": 801840128.0,
+            "70": 801299456.0,
+            "71": 803593216.0,
+            "72": 803593216.0,
+            "73": 802987008.0,
+            "74": 801299456.0,
+            "75": 803593216.0,
+            "76": 803593216.0,
+            "77": 801299456.0,
+            "78": 801299456.0,
+            "79": 803593216.0,
+            "80": 801840128.0,
+            "81": 801299456.0,
+            "82": 803593216.0,
+            "83": 801299456.0,
+            "84": 801299456.0,
+            "85": 803593216.0,
+            "86": 801299456.0,
+            "87": 801299456.0,
+            "88": 803593216.0,
+            "89": 801840128.0,
+            "90": 803593216.0,
+            "91": 802987008.0,
+            "92": 801299456.0,
+            "93": 803593216.0,
+            "94": 801299456.0,
+            "95": 801299456.0,
+            "96": 803593216.0,
+            "97": 801840128.0,
+            "98": 803593216.0,
+            "99": 802987008.0,
+            "100": 801299456.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 988765184.0,
+            "2": 1206831616.0,
+            "3": 1210116096.0,
+            "4": 1210116096.0,
+            "5": 1210116096.0,
+            "6": 1210116096.0,
+            "7": 1210116096.0,
+            "8": 1210116096.0,
+            "9": 1210116096.0,
+            "10": 1210116096.0,
+            "11": 1210116096.0,
+            "12": 1210116096.0,
+            "13": 1210116096.0,
+            "14": 1210116096.0,
+            "15": 1210116096.0,
+            "16": 1210116096.0,
+            "17": 1210116096.0,
+            "18": 1210116096.0,
+            "19": 1210116096.0,
+            "20": 1210116096.0,
+            "21": 1210116096.0,
+            "22": 1210116096.0,
+            "23": 1210116096.0,
+            "24": 1210116096.0,
+            "25": 1210116096.0,
+            "26": 1210116096.0,
+            "27": 1210116096.0,
+            "28": 1210116096.0,
+            "29": 1210116096.0,
+            "30": 1210116096.0,
+            "31": 1210116096.0,
+            "32": 1210116096.0,
+            "33": 1210116096.0,
+            "34": 1210116096.0,
+            "35": 1210116096.0,
+            "36": 1210116096.0,
+            "37": 1210116096.0,
+            "38": 1210116096.0,
+            "39": 1210116096.0,
+            "40": 1210116096.0,
+            "41": 1210116096.0,
+            "42": 1210116096.0,
+            "43": 1210116096.0,
+            "44": 1210116096.0,
+            "45": 1210116096.0,
+            "46": 1210116096.0,
+            "47": 1210116096.0,
+            "48": 1210116096.0,
+            "49": 1210116096.0,
+            "50": 1210116096.0,
+            "51": 1210116096.0,
+            "52": 1210116096.0,
+            "53": 1210116096.0,
+            "54": 1210116096.0,
+            "55": 1210116096.0,
+            "56": 1210116096.0,
+            "57": 1210116096.0,
+            "58": 1210116096.0,
+            "59": 1210116096.0,
+            "60": 1210116096.0,
+            "61": 1210116096.0,
+            "62": 1210116096.0,
+            "63": 1210116096.0,
+            "64": 1210116096.0,
+            "65": 1210116096.0,
+            "66": 1210116096.0,
+            "67": 1210116096.0,
+            "68": 1210116096.0,
+            "69": 1210116096.0,
+            "70": 1210116096.0,
+            "71": 1210116096.0,
+            "72": 1210116096.0,
+            "73": 1210116096.0,
+            "74": 1210116096.0,
+            "75": 1210116096.0,
+            "76": 1210116096.0,
+            "77": 1210116096.0,
+            "78": 1210116096.0,
+            "79": 1210116096.0,
+            "80": 1210116096.0,
+            "81": 1210116096.0,
+            "82": 1210116096.0,
+            "83": 1210116096.0,
+            "84": 1210116096.0,
+            "85": 1210116096.0,
+            "86": 1210116096.0,
+            "87": 1210116096.0,
+            "88": 1210116096.0,
+            "89": 1210116096.0,
+            "90": 1210116096.0,
+            "91": 1210116096.0,
+            "92": 1210116096.0,
+            "93": 1210116096.0,
+            "94": 1210116096.0,
+            "95": 1210116096.0,
+            "96": 1210116096.0,
+            "97": 1210116096.0,
+            "98": 1210116096.0,
+            "99": 1210116096.0,
+            "100": 1210116096.0
+        }
+    },
+    "mtp_1 loss": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 10.88689,
+            "2": 10.90485,
+            "3": 10.90869,
+            "4": 10.86903,
+            "5": 10.91601,
+            "6": 10.906,
+            "7": 10.90268,
+            "8": 10.88984,
+            "9": 10.90425,
+            "10": 10.89144,
+            "11": 10.93384,
+            "12": 10.91647,
+            "13": 10.91108,
+            "14": 10.91974,
+            "15": 10.88488,
+            "16": 10.9077,
+            "17": 10.87571,
+            "18": 10.91379,
+            "19": 10.9092,
+            "20": 10.87837,
+            "21": 10.87896,
+            "22": 10.85583,
+            "23": 10.88007,
+            "24": 10.87245,
+            "25": 10.85859,
+            "26": 10.8696,
+            "27": 10.87702,
+            "28": 10.88641,
+            "29": 10.88866,
+            "30": 10.85422,
+            "31": 10.79713,
+            "32": 10.86631,
+            "33": 10.8781,
+            "34": 10.83982,
+            "35": 10.84165,
+            "36": 10.85012,
+            "37": 10.85556,
+            "38": 10.83674,
+            "39": 10.86355,
+            "40": 10.82887,
+            "41": 10.8341,
+            "42": 10.84469,
+            "43": 10.78828,
+            "44": 10.82123,
+            "45": 10.78831,
+            "46": 10.7823,
+            "47": 10.82898,
+            "48": 10.78985,
+            "49": 10.71269,
+            "50": 10.77382,
+            "51": 10.76639,
+            "52": 10.7397,
+            "53": 10.80285,
+            "54": 10.77365,
+            "55": 10.76066,
+            "56": 10.71068,
+            "57": 10.66686,
+            "58": 10.74378,
+            "59": 10.69209,
+            "60": 10.66474,
+            "61": 10.7073,
+            "62": 10.77206,
+            "63": 10.61812,
+            "64": 10.7178,
+            "65": 10.49439,
+            "66": 10.67106,
+            "67": 10.57534,
+            "68": 10.6873,
+            "69": 10.6816,
+            "70": 10.66836,
+            "71": 10.64586,
+            "72": 10.60925,
+            "73": 10.56508,
+            "74": 10.37144,
+            "75": 10.51183,
+            "76": 10.39914,
+            "77": 10.75182,
+            "78": 10.6268,
+            "79": 10.46827,
+            "80": 10.47524,
+            "81": 10.51083,
+            "82": 10.58769,
+            "83": 10.4381,
+            "84": 10.45057,
+            "85": 10.55084,
+            "86": 10.28076,
+            "87": 10.51088,
+            "88": 10.60323,
+            "89": 10.50794,
+            "90": 10.60274,
+            "91": 10.38238,
+            "92": 10.38703,
+            "93": 10.23076,
+            "94": 10.08438,
+            "95": 10.42616,
+            "96": 10.44905,
+            "97": 10.32215,
+            "98": 10.4966,
+            "99": 10.04765,
+            "100": 10.33491
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 100,
+        "step_interval": 1,
+        "values": {
+            "1": 58.67467,
+            "2": 1.49483,
+            "3": 1.38721,
+            "4": 11.78499,
+            "5": 0.75759,
+            "6": 0.75678,
+            "7": 0.76144,
+            "8": 0.80382,
+            "9": 0.74706,
+            "10": 0.74893,
+            "11": 0.75091,
+            "12": 0.75087,
+            "13": 0.74803,
+            "14": 0.75316,
+            "15": 0.80396,
+            "16": 0.75267,
+            "17": 0.75378,
+            "18": 0.75457,
+            "19": 0.75484,
+            "20": 0.75428,
+            "21": 0.75639,
+            "22": 0.81363,
+            "23": 0.75607,
+            "24": 0.75553,
+            "25": 0.75564,
+            "26": 0.75334,
+            "27": 0.75722,
+            "28": 0.76027,
+            "29": 0.8113,
+            "30": 0.75278,
+            "31": 0.75471,
+            "32": 0.75104,
+            "33": 0.75271,
+            "34": 0.74877,
+            "35": 0.74765,
+            "36": 0.80549,
+            "37": 0.75089,
+            "38": 0.75395,
+            "39": 0.75254,
+            "40": 0.76025,
+            "41": 0.75356,
+            "42": 0.75573,
+            "43": 0.79632,
+            "44": 0.77927,
+            "45": 0.75515,
+            "46": 0.75759,
+            "47": 0.75978,
+            "48": 0.75749,
+            "49": 0.75504,
+            "50": 0.75616,
+            "51": 0.77974,
+            "52": 0.76581,
+            "53": 0.76997,
+            "54": 0.76705,
+            "55": 0.76737,
+            "56": 0.77352,
+            "57": 0.77833,
+            "58": 0.81195,
+            "59": 0.77251,
+            "60": 0.7711,
+            "61": 0.77181,
+            "62": 0.77006,
+            "63": 0.76957,
+            "64": 0.77251,
+            "65": 0.82259,
+            "66": 0.77112,
+            "67": 0.7683,
+            "68": 0.77335,
+            "69": 0.77022,
+            "70": 0.77335,
+            "71": 0.77822,
+            "72": 0.77769,
+            "73": 0.79476,
+            "74": 0.7728,
+            "75": 0.7711,
+            "76": 0.76863,
+            "77": 0.77228,
+            "78": 0.77031,
+            "79": 0.76995,
+            "80": 0.77286,
+            "81": 0.76616,
+            "82": 0.76752,
+            "83": 0.76583,
+            "84": 0.77264,
+            "85": 0.76732,
+            "86": 0.76873,
+            "87": 0.77239,
+            "88": 0.77971,
+            "89": 0.76112,
+            "90": 0.76225,
+            "91": 0.75814,
+            "92": 0.76144,
+            "93": 0.75796,
+            "94": 0.76412,
+            "95": 0.777,
+            "96": 0.77207,
+            "97": 0.7628,
+            "98": 0.76325,
+            "99": 0.76204,
+            "100": 0.7668
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp1pp1ep8/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp1pp1ep8/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4/golden_values_dev_dgx_h100.json
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/functional_tests/test_cases/moe2.0/model_configs/dsv3_proxy.yaml b/tests/functional_tests/test_cases/moe2.0/model_configs/dsv3_proxy.yaml
new file mode 100644
index 00000000000..70924aed0cc
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe2.0/model_configs/dsv3_proxy.yaml
@@ -0,0 +1,85 @@
+MODEL_ARGS:
+  # Data args
+  --seq-length: 4096
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  # Add transformer base args
+  --num-layers: 16
+  --hidden-size: 1024
+  --normalization: RMSNorm
+  --norm-epsilon: 1e-6
+  --disable-bias-linear: true
+  --max-position-embeddings: 4096
+  --make-vocab-size-divisible-by: 3232
+  --untie-embeddings-and-output-weights: true
+  # Add attention related args
+  --multi-latent-attention: true
+  --num-attention-heads: 32
+  --kv-channels: 128
+  --qk-layernorm: true
+  --position-embedding-type: rope
+  --rotary-base: 10000
+  --q-lora-rank: 1536
+  --kv-lora-rank: 512
+  --qk-head-dim: 128
+  --qk-pos-emb-head-dim: 64
+  --v-head-dim: 128
+  --rotary-scaling-factor: 40
+  --mscale: 1.0
+  --mscale-all-dim: 1.0
+  # Add MLP related args
+  --swiglu: true
+  --ffn-hidden-size: 4096
+  # Add MoE args
+  --num-experts: 32
+  --moe-layer-freq: ([0]*1+[1]*15)
+  --moe-ffn-hidden-size: 1024
+  --moe-shared-expert-intermediate-size: 1024
+  --moe-router-load-balancing-type: seq_aux_loss
+  --moe-router-topk: 4
+  --moe-router-pre-softmax: true
+  --moe-grouped-gemm: true
+  --moe-aux-loss-coeff: 1e-4
+  --moe-router-group-topk: 2
+  --moe-router-num-groups: 4
+  --moe-router-topk-scaling-factor: 2.0
+  --moe-router-score-function: sigmoid
+  --moe-router-enable-expert-bias: true
+  --moe-router-bias-update-rate: 1e-3
+  --moe-router-dtype: fp32
+  # Comment out the following MTP args to disable MTP
+  --mtp-num-layers: 1
+  --mtp-loss-scaling-factor: 0.1
+  # Add regularization args
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --clip-grad: 1.0
+  --weight-decay: 0.1
+  # Add learning rate args
+  --lr-warmup-fraction: .01
+  --lr: 0.00015
+  --min-lr: 1.0e-5
+  --lr-decay-style: cosine
+  --adam-beta1: 0.9
+  --adam-beta2: 0.95
+  # Add validation args
+  --eval-iters: 32
+  --eval-interval: 200
+  # Add initialization args
+  --init-method-std: 0.02
+  # Training args
+  --global-batch-size: 32
+  --train-iters: 50
+  --exit-duration-in-mins: 230
+  --no-check-for-nan-in-loss-and-grad: true
+
+METRICS:
+  - "lm loss"
+  - "num-zeros"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
+  - "mtp_1 loss"
+  - "seq_load_balancing_loss"
diff --git a/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3_proxy.yaml b/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3_proxy.yaml
new file mode 100644
index 00000000000..46e298ec971
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3_proxy.yaml
@@ -0,0 +1,74 @@
+MODEL_ARGS:
+  # Data args
+  --seq-length: 4096
+  --data-cache-path: ${DATA_CACHE_PATH}
+  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
+  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
+  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
+  --split: 949,50,1
+  # Add transformer base args
+  --num-layers: 16
+  --hidden-size: 1024
+  --normalization: RMSNorm
+  --norm-epsilon: 1e-6
+  --disable-bias-linear: true
+  --max-position-embeddings: 4096
+  --make-vocab-size-divisible-by: 3232
+  --untie-embeddings-and-output-weights: true
+  # Add attention related args
+  --group-query-attention: true
+  --num-query-groups: 4
+  --kv-channels: 128
+  --qk-layernorm: true
+  --position-embedding-type: rope
+  --rotary-percent: 1.0
+  --rotary-base: 1000000
+  # Add MLP related args
+  --swiglu: true
+  --ffn-hidden-size: 4096
+  # Add MoE args
+  --num-experts: 32
+  --moe-layer-freq: ([0]*1+[1]*15)
+  --moe-ffn-hidden-size: 1024
+  --moe-shared-expert-intermediate-size: 1024
+  --moe-router-load-balancing-type: aux_loss
+  --moe-router-topk: 4
+  --moe-router-pre-softmax: true
+  --moe-grouped-gemm: true
+  --moe-aux-loss-coeff: 1e-4
+  --moe-router-group-topk: 2
+  --moe-router-num-groups: 4
+  --moe-router-topk-scaling-factor: 2.0
+  --moe-router-score-function: sigmoid
+  --moe-router-enable-expert-bias: true
+  --moe-router-bias-update-rate: 1e-3
+  --moe-router-dtype: fp32
+  # Add regularization args
+  --attention-dropout: 0.0
+  --hidden-dropout: 0.0
+  --clip-grad: 1.0
+  --weight-decay: 0.1
+  # Add learning rate args
+  --lr-warmup-fraction: .01
+  --lr: 0.00015
+  --min-lr: 1.0e-5
+  --lr-decay-style: cosine
+  --adam-beta1: 0.9
+  --adam-beta2: 0.95
+  # Add validation args
+  --eval-iters: 32
+  --eval-interval: 200
+  # Add initialization args
+  --init-method-std: 0.02
+  # Training args
+  --global-batch-size: 32
+  --train-iters: 50
+  --exit-duration-in-mins: 230
+  --no-check-for-nan-in-loss-and-grad: true
+
+METRICS:
+  - "lm loss"
+  - "num-zeros"
+  - "mem-allocated-bytes"
+  - "mem-max-allocated-bytes"
+  - "load_balancing_loss"
diff --git a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp1pp1ep8.yaml b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp1pp1ep8.yaml
new file mode 100644
index 00000000000..305e2847305
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp1pp1ep8.yaml
@@ -0,0 +1,41 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_NVLS_ENABLE: 0
+  PYTHONWARNINGS: ignore
+  NCCL_DEBUG: VERSION
+
+MODEL_ARGS:
+  # Transformer Engine args
+  --transformer-impl: transformer_engine
+  # Distributed args
+  --distributed-timeout-minutes: 60
+  --tensor-model-parallel-size: 1
+  --pipeline-model-parallel-size: 1
+  --expert-model-parallel-size: 8
+  --context-parallel-size: 1
+  --expert-tensor-parallel-size: 1
+  --use-distributed-optimizer: true
+  --overlap-grad-reduce: true
+  --overlap-param-gather: true
+  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
+  --attention-backend: unfused # TODO: switch back to fused attention after fix
+  --use-mcore-models: true
+  --sequence-parallel: true
+  --micro-batch-size: 4
+  # MoE training related args
+  --moe-token-dispatcher-type: alltoall
+  --moe-permute-fusion: true
+  --save-interval: 25
+  # Add mixed precision args
+  --bf16: true
+  --exit-interval: 50
+  # kernel fusion related args
+  --no-rope-fusion: true
+  --cross-entropy-loss-fusion: true
+  --cross-entropy-fusion-impl: native
+  # MISC
+  --manual-gc: true
+  --manual-gc-interval: 100
+TEST_TYPE: resume-ckpt
diff --git a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4.yaml b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4.yaml
new file mode 100644
index 00000000000..b93862aff8c
--- /dev/null
+++ b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4.yaml
@@ -0,0 +1,55 @@
+ENV_VARS:
+  CUDA_DEVICE_MAX_CONNECTIONS: 1
+  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_NVLS_ENABLE: 0
+  PYTHONWARNINGS: ignore
+  NCCL_DEBUG: VERSION
+
+MODEL_ARGS:
+  # Transformer Engine args
+  --transformer-impl: transformer_engine
+  # Distributed args
+  --distributed-timeout-minutes: 60
+  --tensor-model-parallel-size: 2
+  --pipeline-model-parallel-size: 2
+  --num-virtual-stages-per-pipeline-rank: 4
+  --expert-model-parallel-size: 4
+  --context-parallel-size: 1
+  --expert-tensor-parallel-size: 1
+  --use-distributed-optimizer: true
+  --overlap-grad-reduce: true
+  --overlap-param-gather: true
+  # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN
+  --attention-backend: unfused # TODO: switch back to fused attention after fix
+  --use-mcore-models: true
+  --sequence-parallel: true
+  --micro-batch-size: 4
+  # MoE training related args
+  --moe-token-dispatcher-type: alltoall
+  --moe-permute-fusion: true
+  # Add checkpointing args
+  --save: ${CHECKPOINT_SAVE_PATH}
+  --load: ${CHECKPOINT_LOAD_PATH}
+  --save-interval: 25
+  # Add logging args
+  --log-timers-to-tensorboard: true
+  --log-memory-to-tensorboard: true
+  --log-num-zeros-in-grad: true
+  --log-params-norm: true
+  --log-validation-ppl-to-tensorboard: true
+  --log-throughput: true
+  --log-interval: 1
+  --logging-level: 40
+  --tensorboard-dir: ${TENSORBOARD_PATH}
+  # Add mixed precision args
+  --bf16: true
+  --exit-interval: 50
+  # kernel fusion related args
+  --no-rope-fusion: true
+  --cross-entropy-loss-fusion: true
+  --cross-entropy-fusion-impl: native
+  # MISC
+  --manual-gc: true
+  --manual-gc-interval: 100
+TEST_TYPE: resume-ckpt
\ No newline at end of file
diff --git a/tests/test_utils/python_scripts/merge_config.py b/tests/test_utils/python_scripts/merge_config.py
new file mode 100644
index 00000000000..176706038b7
--- /dev/null
+++ b/tests/test_utils/python_scripts/merge_config.py
@@ -0,0 +1,92 @@
+"""
+Merges base_config, runtime_config and model_config into one final config that the CI can launch.
+
+Starting Dec 19th 2025 MCore CI supports a new format of defining tests. We are decoupling the test
+config into a modular system of base_config, model_config and runtime_config. This allows us to
+re-use and parametrize a given model easily with multiple runtime configs, like parallelism settings.
+
+With this DRY principle, we simplify test maintenance and reduce the amount of code duplication.
+
+This refactoring is fully compliant with the original CI system as we merge the three configs into one
+final config that the CI can launch.
+
+Precendence: Base config > Model config > Runtime config.
+
+Usage:
+
+python merge_config.py \
+    --model_config model_config.yaml \
+    --base_config base_config.yaml \
+    --runtime_config runtime_config.yaml \
+    --output_config output_config.yaml
+"""
+
+import logging
+
+import click
+import yaml
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+@click.command()
+@click.option("--model_config", type=str, help="Model config to merge")
+@click.option("--base_config", type=str, help="Base config to merge")
+@click.option("--runtime_config", type=str, help="Run time config to merge")
+@click.option("--output_config", type=str, help="Output config to merge")
+def main(model_config, base_config, runtime_config, output_config):
+
+    with open(model_config, "r") as f:
+        model_config = yaml.safe_load(f)
+    with open(base_config, "r") as f:
+        base_config = yaml.safe_load(f)
+    with open(runtime_config, "r") as f:
+        runtime_config = yaml.safe_load(f)
+
+    config = {}
+
+    # Collect all top-level keys (ENV_VARS, MODEL_ARGS, etc.)
+    all_keys = set(base_config.keys()) | set(model_config.keys()) | set(runtime_config.keys())
+
+    for key in all_keys:
+        base_val = base_config.get(key)
+        model_val = model_config.get(key)
+        runtime_val = runtime_config.get(key)
+
+        # Get first non-None value to check type
+        first_val = base_val or model_val or runtime_val
+
+        if isinstance(first_val, dict):
+            # Merge dicts
+            config[key] = {}
+            for val in [base_val, model_val, runtime_val]:
+                if val:
+                    config[key].update(val)
+        elif isinstance(first_val, list):
+            # Concatenate lists (deduplicate while preserving order)
+            config[key] = []
+            seen = set()
+            for val in [base_val, model_val, runtime_val]:
+                if val:
+                    for item in val:
+                        if item not in seen:
+                            config[key].append(item)
+                            seen.add(item)
+        else:
+            # Scalar value (string, int, bool, etc.) - use last defined
+            if runtime_val is not None:
+                config[key] = runtime_val
+            elif model_val is not None:
+                config[key] = model_val
+            else:
+                config[key] = base_val
+
+    with open(output_config, "w") as f:
+        yaml.dump(config, f)
+
+    logger.info(f"Config merged and saved to {output_config}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_utils/python_scripts/recipe_parser.py b/tests/test_utils/python_scripts/recipe_parser.py
index 394bda30a01..a04340407e3 100644
--- a/tests/test_utils/python_scripts/recipe_parser.py
+++ b/tests/test_utils/python_scripts/recipe_parser.py
@@ -100,11 +100,16 @@ def load_and_flatten(config_path: str) -> List[dotdict]:
 
 def filter_by_test_case(workload_manifests: List[dotdict], test_case: str) -> Optional[dotdict]:
     """Returns a workload with matching name. Raises an error if there no or more than a single workload."""
+    print(len(workload_manifests))
     workload_manifests = list(
         workload_manifest
         for workload_manifest in workload_manifests
         if workload_manifest["spec"]["test_case"] == test_case
     )
+    print(len(workload_manifests))
+
+    for w in workload_manifests:
+        print(w["spec"]["test_case"])
 
     if len(workload_manifests) > 1:
         logger.info("Duplicate test_case found!")
diff --git a/tests/test_utils/recipes/h100/gpt.yaml b/tests/test_utils/recipes/h100/gpt.yaml
index 52e38760f84..45985f133dd 100644
--- a/tests/test_utils/recipes/h100/gpt.yaml
+++ b/tests/test_utils/recipes/h100/gpt.yaml
@@ -347,11 +347,21 @@ products:
       - environment: [dev]
         scope: [mr, mr-github, mr-github-slim]
         platforms: [dgx_h100]
+  - test_case: [gpt3_mcore_te_tp2_pp2_mhc]
+    products:
+      - environment: [dev]
+        scope: [mr, mr-github]
+        platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_mla]
     products:
       - environment: [dev]
         scope: [mr, mr-github]
         platforms: [dgx_h100]
+  - test_case: [gpt3_mcore_te_tp2_pp2_dsa]
+    products:
+      - environment: [dev]
+        scope: [mr, mr-github, mr-github-slim]
+        platforms: [dgx_h100]
   - test_case: [gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective]
     products:
       - environment: [dev]
diff --git a/tests/test_utils/recipes/h100/mamba-static-inference.yaml b/tests/test_utils/recipes/h100/mamba-static-inference.yaml
index b36c4a8f765..ae9692b4edc 100644
--- a/tests/test_utils/recipes/h100/mamba-static-inference.yaml
+++ b/tests/test_utils/recipes/h100/mamba-static-inference.yaml
@@ -60,8 +60,8 @@ products:
       - environment: [dev]
         scope: [mr-broken, mr-github-broken]
         platforms: [dgx_h100]
-  - test_case: [hybrid_static_inference_tp1_pp1_2B_cudagraphs]
-    products:
-      - environment: [dev]
-        scope: [mr]
-        platforms: [dg  x_h100]
+  # - test_case: [hybrid_static_inference_tp1_pp1_2B_cudagraphs]
+  #   products:
+  #     - environment: [dev]
+  #       scope: [mr]
+  #       platforms: [dgx_h100] # Broken after dev2main sync 01/27
diff --git a/tests/test_utils/recipes/h100/t5.yaml b/tests/test_utils/recipes/h100/t5.yaml
deleted file mode 100644
index 1761cd3f1e6..00000000000
--- a/tests/test_utils/recipes/h100/t5.yaml
+++ /dev/null
@@ -1,113 +0,0 @@
-type: basic
-format_version: 1
-maintainers: [mcore]
-loggers: [stdout]
-spec:
-  name: '{test_case}_{environment}_{platforms}'
-  model: t5
-  build: mcore-pyt-{environment}
-  nodes: 1
-  gpus: 8
-  platforms: dgx_a100
-  script_setup: |
-    unset https_proxy
-    echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
-
-    # Checkout latest
-    cd /opt
-    rm -rf /opt/megatron-lm; mkdir megatron-lm; cd megatron-lm
-    git init
-    git remote add origin $MCORE_REPO
-    git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
-    git fetch origin $MCORE_MR_COMMIT
-    git checkout $MCORE_MR_COMMIT
-    git rev-parse HEAD
-
-    # Checkout backwards-ref
-    cd /opt
-    rm -rf /opt/megatron-lm-legacy; mkdir megatron-lm-legacy; cd megatron-lm-legacy
-    git init
-    git remote add origin $MCORE_REPO
-    git fetch origin $MCORE_BACKWARDS_COMMIT
-    git checkout $MCORE_BACKWARDS_COMMIT
-    git rev-parse HEAD
-    rm -rf megatron; cp -a /opt/megatron-lm/megatron ./
-  script: |-
-    ls
-    cd /opt/megatron-lm
-
-    NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g')
-
-    ARGUMENTS=(
-        "DATA_PATH=/mnt/artifacts"
-        "DATA_CACHE_PATH=/workspace/data/cache"
-        "OUTPUT_PATH={assets_dir}"
-        "TENSORBOARD_PATH={assets_dir}/tensorboard"
-        "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
-        "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
-        "TRAINING_SCRIPT_PATH=pretrain_t5.py"
-        "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
-        "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
-        "N_REPEAT={n_repeat}"
-        "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}"
-        "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}"
-    )
-
-    bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
-
-products:
-  - test_case: [t5_11b_mcore_tp4_pp1]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_h100]
-  - test_case: [t5_mcore_te_tp4_pp1]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_h100]
-  - test_case: [t5_mcore_te_tp4_pp1_resume_torch_dist]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_h100]
-  - test_case: [t5_mcore_tp4_pp1]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_h100]
-  - test_case: [t5_mcore_tp4_pp1_resume_torch_dist]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_h100]
-  - test_case: [t5_mcore_te_tp1_pp1_vp1_resume_torch]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-  - test_case: [t5_mcore_te_tp2_pp1_vp1]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-  - test_case: [t5_mcore_te_tp2_pp1_vp1_sequence_parallel]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-  - test_case: [t5_mcore_tp1_pp1_vp1]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-  - test_case: [t5_mcore_tp1_pp1_vp1_resume_torch]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
-  - test_case: [t5_mcore_tp2_pp1_vp1]
-    products:
-      - environment: [dev]
-        scope: [nightly]
-        platforms: [dgx_a100, dgx_h100]
diff --git a/tests/test_utils/recipes/h100/bert.yaml b/tests/test_utils/recipes/moe2.0.yaml
similarity index 59%
rename from tests/test_utils/recipes/h100/bert.yaml
rename to tests/test_utils/recipes/moe2.0.yaml
index 89499f93c5e..39fccd08c40 100644
--- a/tests/test_utils/recipes/h100/bert.yaml
+++ b/tests/test_utils/recipes/moe2.0.yaml
@@ -3,14 +3,13 @@ format_version: 1
 maintainers: [mcore]
 loggers: [stdout]
 spec:
-  name: '{test_case}_{environment}_{platforms}'
-  model: bert
-  nodes: 1
+  name: "{test_case}_{environment}_{platforms}"
+  model: moe2.0
   build: mcore-pyt-{environment}
+  nodes: 1
   gpus: 8
+  n_repeat: 5
   platforms: dgx_a100
-  time_limit:
-  n_repeat:
   script_setup: |
     unset https_proxy
     echo "machine gitlab-master.nvidia.com login okoenig password $RO_API_TOKEN" | tee -a /root/.netrc
@@ -37,15 +36,24 @@ spec:
   script: |-
     ls
     cd /opt/megatron-lm
+
     NAME=$(echo {test_case}_{environment} | sed 's/dgx_h100/dgx_a100/g')
+
+    mkdir -p $(dirname ./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml)
+    python ./tests/test_utils/python_scripts/merge_config.py \
+      --base_config ./tests/functional_tests/test_cases/ci_base_config.yml \
+      --model_config ./tests/functional_tests/test_cases/{model}/model_configs/{model_config}.yaml \
+      --runtime_config ./tests/functional_tests/test_cases/{model}/runtime_configs/{runtime_config}.yaml \
+      --output_config ./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml
+
     ARGUMENTS=(
         "DATA_PATH=/mnt/artifacts"
-        "DATA_CACHE_PATH=/workspace/data/cache" 
+        "DATA_CACHE_PATH=/workspace/data/cache"
         "OUTPUT_PATH={assets_dir}"
         "TENSORBOARD_PATH={assets_dir}/tensorboard"
         "CHECKPOINT_SAVE_PATH={artifacts_dir}/checkpoints"
         "CHECKPOINT_LOAD_PATH=/mnt/artifacts"
-        "TRAINING_SCRIPT_PATH=pretrain_bert.py"
+        "TRAINING_SCRIPT_PATH=pretrain_gpt.py"
         "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml"
         "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json"
         "N_REPEAT={n_repeat}"
@@ -56,38 +64,49 @@ spec:
     bash ./tests/functional_tests/shell_test_utils/run_ci_test.sh ${{ARGUMENTS[@]}}
 
 products:
-  - test_case: [bert_mcore_tp2_pp2]
+  ###########################
+  # Merge train tests       #
+  ###########################
+  - test_case: [dsv3_tp1pp1ep8]
     products:
-      - environment: [dev]
-        scope: [nightly]
+      - model_config: dsv3_proxy
+        runtime_config: tp1pp1ep8
+        environment: [dev]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
-  - test_case: [bert_mcore_tp2_pp2_local_spec]
+  - test_case: [dsv3_tp2pp2ep4]
     products:
-      - environment: [dev]
-        scope: [nightly]
+      - model_config: dsv3_proxy
+        runtime_config: tp2pp2ep4
+        environment: [dev]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
-  - test_case: [bert_mcore_tp2_pp2_resume_torch_dist]
+  - test_case: [qwen3_tp1pp1ep1]
     products:
-      - environment: [dev]
-        scope: [nightly]
+      - model_config: qwen3_proxy
+        runtime_config: tp1pp1ep1
+        environment: [dev]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
-  - test_case: [bert_mcore_tp2_pp2_resume_torch_dist_local_spec]
+  - test_case: [qwen3_tp2pp2ep4]
     products:
-      - environment: [dev]
-        scope: [nightly]
+      - model_config: qwen3_proxy
+        runtime_config: tp2pp2ep4
+        environment: [dev]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
   - test_case: [bert_mcore_tp1_pp2]
     products:
       - environment: [dev]
-        scope: [nightly]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
   - test_case: [bert_mcore_tp1_pp4_vp2]
     products:
       - environment: [dev]
-        scope: [nightly]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
   - test_case: [bert_mcore_tp4_pp1]
     products:
       - environment: [dev]
-        scope: [nightly]
+        scope: [nightly-broken]
         platforms: [dgx_h100]
diff --git a/tests/unit_tests/a2a_overlap/test_delay_wgrad_compute.py b/tests/unit_tests/a2a_overlap/test_delay_wgrad_compute.py
new file mode 100644
index 00000000000..cfde6cf51b1
--- /dev/null
+++ b/tests/unit_tests/a2a_overlap/test_delay_wgrad_compute.py
@@ -0,0 +1,230 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+import gc
+
+import pytest
+import torch
+
+from megatron.core.models.gpt.gpt_layer_specs import get_gpt_decoder_block_spec
+from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.core.transformer import TransformerLayer
+from megatron.core.transformer.module import float16_to_fp32
+from megatron.core.utils import is_te_min_version
+from tests.unit_tests.a2a_overlap.utils import (
+    deterministic_mode,
+    get_test_config,
+    get_valid_flex_dispatcher_backend,
+    get_valid_fp8_flags,
+    get_valid_token_dispatcher_types,
+    reset_model,
+)
+from tests.unit_tests.test_utilities import Utils
+
+NUM_STEPS = 3
+SEQ_LEN = 128
+VOCAB_SIZE = 512
+LR = 0.01
+
+
+def _build_gpt_model(config):
+    """Build and return a GPTModel on CUDA from the given config."""
+    layer_spec = get_gpt_decoder_block_spec(config=config, use_transformer_engine=True)
+    model = GPTModel(
+        config=config,
+        transformer_layer_spec=layer_spec,
+        vocab_size=VOCAB_SIZE,
+        pre_process=True,
+        post_process=True,
+        max_sequence_length=300,
+    )
+    model.cuda()
+    return model
+
+
+def _build_input_data():
+    """Build fixed input data for the model."""
+    return {
+        "input_ids": torch.randint(0, VOCAB_SIZE, (1, SEQ_LEN), dtype=torch.int64).cuda(),
+        "labels": torch.randint(0, VOCAB_SIZE, (1, SEQ_LEN), dtype=torch.int64).cuda(),
+        "position_ids": torch.arange(SEQ_LEN, dtype=torch.int64).unsqueeze(0).cuda(),
+        "attention_mask": torch.ones((1, 1, SEQ_LEN, SEQ_LEN), dtype=bool).cuda(),
+    }
+
+
+def _train_step(model, optimizer, data):
+    """Run one forward-backward-optimizer step. Return the detached loss."""
+    optimizer.zero_grad()
+    loss = model.forward(**data)
+    loss = float16_to_fp32(loss)
+    loss.backward(torch.ones_like(loss))
+    optimizer.step()
+    return loss.detach().clone()
+
+
+def _assert_models_equal(ref_model, test_model):
+    """Assert that all parameters of two models are bit-identical."""
+    rank = torch.distributed.get_rank()
+    for (name_r, param_r), (_, param_t) in zip(
+        ref_model.named_parameters(), test_model.named_parameters()
+    ):
+        assert torch.equal(
+            param_r.data, param_t.data
+        ), f"[rank {rank}] Parameter mismatch after training: {name_r}"
+
+
+class TestDelayWgradCompute:
+    """Verify that overlap_dispatch_backward_with_experts_wgrad produces identical
+    training behaviour (per-step loss and final weights) as the non-delayed baseline
+    across multiple forward-backward-optimizer steps on the full GPTModel.
+    """
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=1,
+            pipeline_model_parallel_size=1,
+            expert_model_parallel_size=4,
+        )
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not is_te_min_version("2.3.0"), reason="Requires TE >= 2.3.0")
+    @pytest.mark.parametrize("shared_expert_intermediate_size", [None, 512])
+    @pytest.mark.parametrize("dispatcher_type", get_valid_token_dispatcher_types())
+    @pytest.mark.parametrize("fp8_flag", get_valid_fp8_flags())
+    def test_overlap_dispatch_backward_with_experts_wgrad(
+        self, shared_expert_intermediate_size, dispatcher_type, fp8_flag
+    ):
+        """Verify that overlap_dispatch_backward_with_experts_wgrad produces identical
+        per-step loss and final weights as the non-delayed baseline across multiple
+        forward-backward-optimizer steps on the full GPTModel.
+
+        Covers single/multi-layer, with/without shared experts, dispatcher types,
+        and FP8 modes.
+        """
+        num_layers = 4
+        extra_kwargs = {"moe_token_dispatcher_type": dispatcher_type}
+        if dispatcher_type == "flex":
+            extra_kwargs["moe_flex_dispatcher_backend"] = get_valid_flex_dispatcher_backend()
+        if fp8_flag is not None:
+            extra_kwargs["fp8"] = fp8_flag[0]
+            extra_kwargs["fp8_recipe"] = fp8_flag[1]
+        if shared_expert_intermediate_size is not None:
+            extra_kwargs["moe_shared_expert_intermediate_size"] = shared_expert_intermediate_size
+
+        with deterministic_mode():
+            ref_config = get_test_config(num_layers=num_layers, extra_kwargs=extra_kwargs)
+            ref_model = _build_gpt_model(ref_config)
+            init_params = reset_model(ref_model)
+
+            delay_kwargs = {**extra_kwargs, "overlap_dispatch_backward_with_experts_wgrad": True}
+            test_config = get_test_config(num_layers=num_layers, extra_kwargs=delay_kwargs)
+            test_model = _build_gpt_model(test_config)
+            reset_model(test_model, init_params)
+
+            data = _build_input_data()
+            ref_opt = torch.optim.SGD(ref_model.parameters(), lr=LR)
+            test_opt = torch.optim.SGD(test_model.parameters(), lr=LR)
+
+            rank = torch.distributed.get_rank()
+            for step in range(NUM_STEPS):
+                ref_loss = _train_step(ref_model, ref_opt, data)
+                test_loss = _train_step(test_model, test_opt, data)
+                assert torch.equal(ref_loss, test_loss), (
+                    f"[rank {rank}] Loss mismatch at step {step}: "
+                    f"ref={ref_loss.item()}, test={test_loss.item()}"
+                )
+
+            _assert_models_equal(ref_model, test_model)
+
+            del ref_model, test_model
+            gc.collect()
+            torch.cuda.empty_cache()
+
+    @pytest.mark.skipif(not is_te_min_version("2.3.0"), reason="Requires TE >= 2.3.0")
+    @pytest.mark.parametrize("shared_expert_intermediate_size", [None, 512])
+    @pytest.mark.parametrize("dispatcher_type", get_valid_token_dispatcher_types())
+    def test_overlap_dispatch_backward_with_experts_wgrad_with_fsdp(
+        self, shared_expert_intermediate_size, dispatcher_type
+    ):
+        """Verify delayed wgrad with MegatronFSDP wrapping.
+
+        The delayed wgrad path defers the FSDP reduce-scatter for expert
+        parameters until the wgrad computation completes on a separate stream.
+        This test checks that the deferred reduce-scatter produces identical
+        per-step loss and final weights as the non-delayed FSDP baseline.
+        """
+        from torch.distributed import DeviceMesh
+
+        from megatron.core import parallel_state
+        from megatron.core.distributed.fsdp.src.megatron_fsdp.fully_shard import (
+            fully_shard_model,
+            fully_shard_optimizer,
+        )
+
+        # Build expert device mesh required by MegatronFSDP for expert parallelism.
+        # Non-expert DeviceMesh will be auto-generated by fully_shard_model() with
+        # the same mesh dimension names (but different mesh shape, DP=WORLD_SIZE).
+        expt_dp_group = parallel_state.get_expert_data_parallel_group()
+        expt_dp_ranks = torch.distributed.get_process_group_ranks(expt_dp_group)
+        expt_tp_group = torch.distributed.new_group(
+            ranks=[torch.distributed.get_rank()]
+        )  # Dummy TP=1 group.
+        expt_device_mesh = DeviceMesh.from_group(
+            [expt_dp_group, expt_tp_group],
+            device_type="cuda",
+            mesh=[[x] for x in expt_dp_ranks],
+            # These are the default Megatron-FSDP DeviceMesh dimension names.
+            # Make sure they match the device_mesh=None case.
+            mesh_dim_names=("fsdp", "tp"),
+        )
+
+        num_layers = 4
+        extra_kwargs = {"moe_token_dispatcher_type": dispatcher_type}
+        if dispatcher_type == "flex":
+            extra_kwargs["moe_flex_dispatcher_backend"] = get_valid_flex_dispatcher_backend()
+        if shared_expert_intermediate_size is not None:
+            extra_kwargs["moe_shared_expert_intermediate_size"] = shared_expert_intermediate_size
+
+        with deterministic_mode():
+            # Build reference model (no delay) and wrap with FSDP
+            ref_config = get_test_config(num_layers=num_layers, extra_kwargs=extra_kwargs)
+            ref_model = _build_gpt_model(ref_config)
+            init_params = reset_model(ref_model)
+
+            ref_fsdp = fully_shard_model(
+                module=ref_model,
+                fsdp_unit_modules=[TransformerLayer],
+                expt_device_mesh=expt_device_mesh,
+            )
+            ref_opt = torch.optim.SGD(ref_fsdp.parameters(), lr=LR)
+            ref_opt = fully_shard_optimizer(optimizer=ref_opt)
+
+            # Build test model (with delay) and wrap with FSDP
+            delay_kwargs = {**extra_kwargs, "overlap_dispatch_backward_with_experts_wgrad": True}
+            test_config = get_test_config(num_layers=num_layers, extra_kwargs=delay_kwargs)
+            test_model = _build_gpt_model(test_config)
+            reset_model(test_model, init_params)
+
+            test_fsdp = fully_shard_model(
+                module=test_model,
+                fsdp_unit_modules=[TransformerLayer],
+                expt_device_mesh=expt_device_mesh,
+            )
+            test_opt = torch.optim.SGD(test_fsdp.parameters(), lr=LR)
+            test_opt = fully_shard_optimizer(optimizer=test_opt)
+
+            data = _build_input_data()
+            rank = torch.distributed.get_rank()
+            for step in range(NUM_STEPS):
+                ref_loss = _train_step(ref_fsdp, ref_opt, data)
+                test_loss = _train_step(test_fsdp, test_opt, data)
+                assert torch.equal(ref_loss, test_loss), (
+                    f"[rank {rank}] Loss mismatch at step {step}: "
+                    f"ref={ref_loss.item()}, test={test_loss.item()}"
+                )
+
+            _assert_models_equal(ref_fsdp, test_fsdp)
+
+            del ref_fsdp, test_fsdp, ref_opt, test_opt
+            gc.collect()
+            torch.cuda.empty_cache()
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
index 6c59dd3f9e3..b933015406f 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
@@ -103,7 +103,6 @@ def test_1f1b_schedule_model_chunk(self, mtp_layers, dispatcher_type, fp8_flag,
         extra_kwargs = {"moe_token_dispatcher_type": dispatcher_type}
         if dispatcher_type == "flex":
             extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
-            extra_kwargs["moe_router_dtype"] = "fp32"
         if fp8_flag is not None:
             extra_kwargs["fp8"] = fp8_flag[0]
             extra_kwargs["fp8_recipe"] = fp8_flag[1]
@@ -215,7 +214,6 @@ def test_1f1b_schedule_model_chunk_with_padding_mask(self, dispatcher_type, laye
         }
         if dispatcher_type == "flex":
             extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
-            extra_kwargs["moe_router_dtype"] = "fp32"
         with deterministic_mode():
             for layer_num in layers:
                 output_tensors = []
diff --git a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
index 4bbab6ccb30..95e2e1950d9 100644
--- a/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
+++ b/tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
@@ -410,7 +410,6 @@ def test_transformer_layer_overlap(self, dispatcher_type, fp8_flag):
         extra_kwargs = {"moe_token_dispatcher_type": dispatcher_type}
         if dispatcher_type == "flex":
             extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
-            extra_kwargs["moe_router_dtype"] = "fp32"
         if fp8_flag is not None:
             extra_kwargs["fp8"] = fp8_flag[0]
             extra_kwargs["fp8_recipe"] = fp8_flag[1]
@@ -460,7 +459,6 @@ def test_mtp_layer_overlap(self, dispatcher_type, fp8_flag):
         }
         if dispatcher_type == "flex":
             extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
-            extra_kwargs["moe_router_dtype"] = "fp32"
         if fp8_flag is not None:
             extra_kwargs["fp8_recipe"] = fp8_flag[1]
             extra_kwargs["fp8"] = fp8_flag[0]
diff --git a/tests/unit_tests/a2a_overlap/utils.py b/tests/unit_tests/a2a_overlap/utils.py
index a52843956df..9a644ee8cc8 100644
--- a/tests/unit_tests/a2a_overlap/utils.py
+++ b/tests/unit_tests/a2a_overlap/utils.py
@@ -216,33 +216,54 @@ def get_test_config(num_layers=1, num_moe_experts=8, extra_kwargs={}, moe_groupe
         multi_latent_attention=True,
         num_moe_experts=num_moe_experts,
         moe_grouped_gemm=moe_grouped_gemm,
+        moe_router_dtype="fp32",
         **extra_kwargs,
     )
     return config
 
 
 def get_valid_token_dispatcher_types():
-    try:
-        from deep_ep import Buffer
-        from deep_ep.utils import EventHandle, EventOverlap
+    from megatron.core.transformer.moe.fused_a2a import HAVE_DEEP_EP, HAVE_HYBRIDEP
 
+    if HAVE_HYBRIDEP or HAVE_DEEP_EP:
         return ["alltoall", "flex"]
-    except ImportError:
+    else:
         return ["alltoall"]
 
 
+def get_valid_flex_dispatcher_backend():
+    from megatron.core.transformer.moe.fused_a2a import HAVE_DEEP_EP, HAVE_HYBRIDEP
+
+    if HAVE_HYBRIDEP:
+        return "hybridep"
+    elif HAVE_DEEP_EP:
+        return "deepep"
+    else:
+        return None
+
+
 def get_valid_fp8_flags():
     from megatron.core.enums import Fp8Recipe
+    from megatron.training.utils import get_device_arch_version
 
     fp8_types = ["e4m3", "hybrid"]
     recipes = []
-    valid_flags = []
+    arch = get_device_arch_version()
+
     if is_te_min_version("2.3.0.dev0"):
-        recipes.append(Fp8Recipe.blockwise)
-        recipes.append(Fp8Recipe.tensorwise)
+        recipes.append(Fp8Recipe.tensorwise)  # Hopper + Blackwell
 
+    if is_te_min_version("2.4.0.dev0") and arch == 9:
+        recipes.append(Fp8Recipe.blockwise)  # Hopper only
+
+    if is_te_min_version("2.3.0.dev0") and arch >= 10:
+        recipes.append(Fp8Recipe.mxfp8)  # Blackwell only
+
+    valid_flags = []
     for fp8_type in fp8_types:
         for recipe in recipes:
+            if fp8_type == "hybrid" and recipe == Fp8Recipe.mxfp8:
+                continue
             valid_flags.append((fp8_type, recipe))
     valid_flags.append(None)
 
diff --git a/tests/unit_tests/dist_checkpointing/utils.py b/tests/unit_tests/dist_checkpointing/utils.py
index ec95602b020..0aadaee3b29 100644
--- a/tests/unit_tests/dist_checkpointing/utils.py
+++ b/tests/unit_tests/dist_checkpointing/utils.py
@@ -15,7 +15,7 @@
     get_gpt_layer_with_transformer_engine_spec,
 )
 from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
-from megatron.core.optimizer.muon import get_megatron_muon_optimizer
+from megatron.core.optimizer.optimizer import ChainedOptimizer
 from megatron.core.tensor_parallel import model_parallel_cuda_manual_seed
 from megatron.core.transformer import TransformerConfig
 from megatron.training.arguments import parse_args
@@ -178,11 +178,6 @@ def init_checkpointing_mock_args(args, ckpt_dir, fully_parallel=False):
 def setup_model_and_optimizer(
     seed, tp, pp, initialize_fn=initialize_gpt_model, bf16=True, dist_opt=True, optimizer='adam'
 ):
-    if 'muon' in optimizer and dist_opt:
-        raise ValueError(
-            "Layer-wise distributed optimizer with Muon is not supported with distributed optimizer."
-        )
-
     mock_args = parse_args(ignore_unknown_args=True)
     with mock.patch('megatron.training.training.get_args', new=lambda: mock_args):
         init_basic_mock_args(mock_args, tp, pp, bf16=bf16)
@@ -197,37 +192,42 @@ def setup_model_and_optimizer(
             )
         )
 
+    optimizer_type = optimizer
+    use_layer_wise = False
+    if optimizer_type == 'dist_muon':
+        optimizer = 'muon'
+        use_layer_wise = True
+    if optimizer_type in ('muon', 'dist_muon') and dist_opt:
+        use_layer_wise = True
+        dist_opt = False
+
     config = OptimizerConfig(
         bf16=bf16,
         params_dtype=torch.bfloat16 if bf16 else torch.float,
         use_distributed_optimizer=dist_opt,
+        use_layer_wise_distributed_optimizer=use_layer_wise,
         optimizer=optimizer,
     )
 
-    if 'muon' in optimizer:
-        # Use layer-wise distributed optimizer with Muon
-        optimizer_type = optimizer
-        # default lr None feels wrong. only change muon lr to avoid breaking old tests
+    if optimizer_type in ('muon', 'dist_muon'):
         config.lr = 0.0
-        optimizer = get_megatron_muon_optimizer(
-            config, model, layer_wise_distributed_optimizer='dist' in optimizer_type
-        )
-    else:
-        optimizer_type = optimizer
-        optimizer = get_megatron_optimizer(config, model)
+    optimizer = get_megatron_optimizer(config, model)
 
     torch.manual_seed(seed + 1)
     model_parallel_cuda_manual_seed(seed + 1)
 
-    if not 'muon' in optimizer_type:
+    if isinstance(optimizer, ChainedOptimizer):
+        for opt in optimizer.chained_optimizers:
+            if not hasattr(opt, 'optimizer'):
+                opt.init_state_fn(opt)
+            else:
+                opt.init_state_fn(opt.optimizer)
+    else:
         for group in optimizer.optimizer.param_groups:
             for p in group['params']:
                 if len(optimizer.optimizer.state[p]) == 0:
                     optimizer.optimizer.state[p]['exp_avg'] = torch.rand_like(p.data)
                     optimizer.optimizer.state[p]['exp_avg_sq'] = torch.rand_like(p.data)
-    else:
-        for opt in optimizer.chained_optimizers:
-            opt.init_state_fn(opt)
 
     optimizer.reload_model_params()
     CachedMetadataFileSystemReader.clear_metadata_cache()
@@ -272,10 +272,6 @@ def setup_moe_model_and_optimizer(
     use_glu=False,
     optimizer='adam',
 ):
-    if 'muon' in optimizer and dist_opt:
-        raise ValueError(
-            "Layer-wise distributed optimizer with Muon is not supported with distributed optimizer."
-        )
     mock_args = parse_args(ignore_unknown_args=True)
     with mock.patch('megatron.training.training.get_args', new=lambda: mock_args):
         init_basic_mock_args(mock_args, tp, pp, bf16=bf16)
@@ -295,37 +291,43 @@ def setup_moe_model_and_optimizer(
             )
         )
 
+    optimizer_type = optimizer
+    use_layer_wise = False
+    if optimizer_type == 'dist_muon':
+        optimizer = 'muon'
+        use_layer_wise = True
+    if optimizer_type in ('muon', 'dist_muon') and dist_opt:
+        use_layer_wise = True
+        dist_opt = False
+
     config = OptimizerConfig(
         bf16=bf16,
         params_dtype=torch.bfloat16 if bf16 else torch.float,
         use_distributed_optimizer=dist_opt,
+        use_layer_wise_distributed_optimizer=use_layer_wise,
         optimizer=optimizer,
     )
 
-    if 'muon' in optimizer:
-        optimizer_type = optimizer
-        # default lr None feels wrong. only change muon lr to avoid breaking old tests
+    if optimizer_type in ('muon', 'dist_muon'):
         config.lr = 0.0
-        optimizer = get_megatron_muon_optimizer(
-            config, model, layer_wise_distributed_optimizer='dist' in optimizer_type
-        )
-    else:
-        optimizer_type = optimizer
-        optimizer = get_megatron_optimizer(config, model)
+    optimizer = get_megatron_optimizer(config, model)
 
     torch.manual_seed(seed + 1)
     model_parallel_cuda_manual_seed(seed + 1)
 
-    if not 'muon' in optimizer_type:
+    if optimizer_type in ('muon', 'dist_muon'):
+        for opt in optimizer.chained_optimizers:
+            if not hasattr(opt, 'optimizer'):
+                opt.init_state_fn(opt)
+            else:
+                opt.init_state_fn(opt.optimizer)
+    else:
         for opt in optimizer.chained_optimizers:
             for group in opt.param_groups:
                 for p in group['params']:
                     if len(opt.state[p]) == 0:
                         opt.state[p]['exp_avg'] = torch.rand_like(p.data)
                         opt.state[p]['exp_avg_sq'] = torch.rand_like(p.data)
-    else:
-        for opt in optimizer.chained_optimizers:
-            opt.init_state_fn(opt)
 
     optimizer.reload_model_params()
     CachedMetadataFileSystemReader.clear_metadata_cache()
diff --git a/tests/unit_tests/distributed/megatron_fsdp/test_mcore_fully_sharded_data_parallel.py b/tests/unit_tests/distributed/megatron_fsdp/test_mcore_fully_sharded_data_parallel.py
index 0271da1fed9..82d4155e758 100644
--- a/tests/unit_tests/distributed/megatron_fsdp/test_mcore_fully_sharded_data_parallel.py
+++ b/tests/unit_tests/distributed/megatron_fsdp/test_mcore_fully_sharded_data_parallel.py
@@ -321,6 +321,52 @@ def train_step(model, optimizer, inputs):
                 msg=f"Parameters for {name1} don't match",
             )
 
+    def test_fsdp_expt_device_mesh(self):
+        """Test that expt_device_mesh is None for dense models and not None for MoE models."""
+        if not is_torch_min_version("2.4.0"):
+            pytest.skip("Megatron FSDP requires torch >= 2.4.0")
+
+        fsdp_config = DistributedDataParallelConfig(
+            data_parallel_sharding_strategy="optim_grads_params",
+            overlap_grad_reduce=True,
+            overlap_param_gather=True,
+            bucket_size=10000,
+            use_megatron_fsdp=True,
+        )
+        input_dim, output_dim = 13, 17
+
+        # Dense model: expt_device_mesh should not be built without MoE config
+        dense_config = TransformerConfig(
+            num_attention_heads=1, num_layers=1, context_parallel_size=1
+        )
+        dense_model = TestModel(input_dim=input_dim, output_dim=output_dim).cuda()
+        fsdp_dense = FullyShardedDataParallel(
+            config=dense_config,
+            ddp_config=fsdp_config,
+            module=dense_model,
+            fsdp_unit_modules=[torch.nn.Linear],
+        )
+        assert (
+            fsdp_dense.megatron_fsdp_dist_index.expt_device_mesh is None
+        ), "Dense model: expt_device_mesh should be None"
+        fsdp_dense.stop_communication()
+
+        # MoE model: expt_device_mesh should be built when num_moe_experts is set
+        moe_config = TransformerConfig(
+            num_attention_heads=1, num_layers=1, context_parallel_size=1, num_moe_experts=4
+        )
+        moe_model = TestModel(input_dim=input_dim, output_dim=output_dim).cuda()
+        fsdp_moe = FullyShardedDataParallel(
+            config=moe_config,
+            ddp_config=fsdp_config,
+            module=moe_model,
+            fsdp_unit_modules=[torch.nn.Linear],
+        )
+        assert (
+            fsdp_moe.megatron_fsdp_dist_index.expt_device_mesh is not None
+        ), "MoE model: expt_device_mesh should not be None"
+        fsdp_moe.stop_communication()
+
     # Testing fsdp_double_buffer with and without nccl_ub
     @pytest.mark.parametrize(
         ("dp_size", "nccl_ub", "fsdp_double_buffer", "fsdp_manual_registration"),
diff --git a/tests/unit_tests/fusions/test_fused_linear_cross_entropy.py b/tests/unit_tests/fusions/test_fused_linear_cross_entropy.py
new file mode 100644
index 00000000000..3ac8e7f6200
--- /dev/null
+++ b/tests/unit_tests/fusions/test_fused_linear_cross_entropy.py
@@ -0,0 +1,1509 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+
+import contextlib
+import os
+import typing
+from contextlib import ExitStack
+from dataclasses import dataclass
+
+import numpy as np
+import pytest
+import torch
+import torch.distributed as dist
+from torch.utils.data import DataLoader, Dataset
+from torch.utils.data.distributed import DistributedSampler
+
+import megatron.core.parallel_state as ps
+from megatron.core.fusions.fused_linear_cross_entropy import linear_cross_entropy
+from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_decoder_block_spec,
+    get_gpt_mtp_block_spec,
+)
+from megatron.core.models.gpt.gpt_model import GPTModel
+from megatron.training.utils import get_device_arch_version
+from tests.unit_tests.a2a_overlap.utils import (
+    deterministic_mode,
+    get_test_config,
+    get_valid_fp8_flags,
+    get_valid_token_dispatcher_types,
+)
+from tests.unit_tests.test_utilities import Utils
+
+
+# 1. Define a standardized context to hold your distributed info
+@dataclass
+class DistContext:
+    rank: int
+    world_size: int
+    group: dist.ProcessGroup
+    is_chief: bool
+
+
+# 2. Create a module-scoped fixture
+# This runs ONE time per file, no matter how many test classes you have.
+@pytest.fixture(scope="module")
+def distributed_context():
+    # --- PRE-CHECK ---
+    if "WORLD_SIZE" not in os.environ or int(os.environ["WORLD_SIZE"]) < 2:
+        pytest.skip("Requires torchrun with multiple GPUs (WORLD_SIZE >= 2)")
+
+    # --- SETUP ---
+    is_external_init = dist.is_initialized()
+
+    if not is_external_init:
+        # Initialize only if not already done (e.g., by another test runner)
+        dist.init_process_group(
+            backend="nccl",
+            init_method="env://",
+            world_size=int(os.environ["WORLD_SIZE"]),
+            rank=int(os.environ["RANK"]),
+        )
+
+    # Set device immediately to avoid cross-device pollution
+    local_rank = int(os.environ.get("LOCAL_RANK", os.environ["RANK"]))
+    device = torch.device(f"cuda:{local_rank}")
+    torch.cuda.set_device(device)
+
+    # Gather context data
+    rank = dist.get_rank()
+    world_size = dist.get_world_size()
+    group = dist.group.WORLD
+
+    print(f"[INFO]: Initialized Rank: {rank} / {world_size}")
+
+    context = DistContext(rank=rank, world_size=world_size, group=group, is_chief=(rank == 0))
+
+    # Yield control to the tests
+    yield context
+
+    # --- TEARDOWN ---
+    # Only destroy if we were the ones who initialized it
+    if not is_external_init:
+        dist.destroy_process_group()
+
+
+class MockDataset(Dataset):
+    """
+    Mock dataset for torchtitan GPT training tests
+    Generates synthetic tokenized sequences on-the-fly
+    """
+
+    def __init__(
+        self,
+        num_samples=10000,
+        micro_batch_size=4,
+        sequence_length=2048,
+        vocab_size=128256,
+        seed=42,
+    ):
+        """
+        Initialize mock dataset
+
+        Args:
+            num_samples: Total number of samples
+            sequence_length: Length of each sequence
+            vocab_size: Size of vocabulary
+            seed: Random seed for reproducibility
+        """
+        self.num_samples = num_samples
+        self.micro_batch_size = micro_batch_size
+        self.sequence_length = sequence_length
+        self.vocab_size = vocab_size
+        self.seed = seed
+
+        # Set numpy seed for deterministic generation
+        np.random.seed(seed)
+
+    def __len__(self):
+        return self.num_samples
+
+    def __getitem__(self, idx):
+        """
+        Generate a single training sample
+
+        Returns:
+            dict with 'tokens' and 'labels'
+        """
+        # Use idx as seed for reproducible but varied samples
+        rng = np.random.RandomState(self.seed + idx)
+
+        # Generate random token sequence
+        tokens = rng.randint(0, self.vocab_size, size=self.sequence_length, dtype=np.int64)
+
+        # Labels are tokens shifted by 1 (next token prediction)
+        labels = rng.randint(0, self.vocab_size, size=self.sequence_length, dtype=np.int64)
+
+        return {
+            'input_ids': torch.from_numpy(tokens.copy()),
+            'labels': torch.from_numpy(labels.copy()),
+            "attention_mask": torch.ones(
+                (1, self.sequence_length, self.sequence_length), dtype=bool
+            ),
+        }
+
+
+def build_model(config):
+    max_seq_len = 300
+
+    # build layer spec
+    transformer_layer_spec = get_gpt_decoder_block_spec(config=config, use_transformer_engine=True)
+    mtp_block_spec = get_gpt_mtp_block_spec(config, transformer_layer_spec.layer_specs[-1], True)
+
+    # build model
+    gpt_model = GPTModel(
+        config=config,
+        transformer_layer_spec=transformer_layer_spec,
+        mtp_block_spec=mtp_block_spec,
+        vocab_size=100,
+        pre_process=True,
+        post_process=True,
+        max_sequence_length=max_seq_len,
+    )
+    return gpt_model
+
+
+# Define a reusable context manager
+@contextlib.contextmanager
+def init_model_parallel(tp=1, pp=1, ep=1):
+    try:
+        Utils.initialize_model_parallel(
+            tensor_model_parallel_size=tp,
+            pipeline_model_parallel_size=pp,
+            expert_model_parallel_size=ep,
+        )
+        yield
+    finally:
+        Utils.destroy_model_parallel()
+
+
+def init_gpt_dataloader(
+    dp_group, micro_batch_size=1, vocab_size=50257, sequence_length=128, batch_size=8
+):
+    dataset = MockDataset(
+        num_samples=1000,
+        micro_batch_size=micro_batch_size,
+        sequence_length=sequence_length,
+        vocab_size=vocab_size,
+        seed=42,
+    )
+    sampler = DistributedSampler(dataset, num_replicas=dp_group.size(), rank=dp_group.rank())
+    dataloader = DataLoader(dataset, batch_size=batch_size, sampler=sampler)
+    return dataloader
+
+
+# skip it for good
+@pytest.mark.skipif(
+    ("WORLD_SIZE" not in os.environ or int(os.environ["WORLD_SIZE"]) < 2) or True,
+    reason="Requires torchrun with multiple GPUs",
+)
+class TestFusedLinearCrossEntropyOnGptModel:
+    @pytest.mark.parametrize("fp8_flag", get_valid_fp8_flags())
+    @pytest.mark.parametrize("mtp_layers", [0, 1])
+    @pytest.mark.parametrize("dispatcher_type", get_valid_token_dispatcher_types())
+    @pytest.mark.parametrize("layer_num", [2])
+    def test_gpt_model(self, mtp_layers, dispatcher_type, fp8_flag, layer_num):
+        with ExitStack() as stack:
+            gpu_count = torch.cuda.device_count()
+            tp = min(2, gpu_count)
+            ep = gpu_count // tp
+            stack.enter_context(init_model_parallel(tp=tp, ep=ep))
+            stack.enter_context(deterministic_mode())
+
+            # create TransformerConfig
+            extra_kwargs = {
+                "moe_token_dispatcher_type": dispatcher_type,
+                "sequence_parallel": tp > 1,
+                "tensor_model_parallel_size": tp,
+            }
+            if dispatcher_type == "flex":
+                extra_kwargs["moe_enable_deepep"] = True
+                extra_kwargs["moe_router_dtype"] = "fp32"
+            if fp8_flag is not None:
+                extra_kwargs["fp8"] = fp8_flag[0]
+                extra_kwargs["fp8_recipe"] = fp8_flag[1]
+            if mtp_layers > 0:
+                extra_kwargs["mtp_num_layers"] = mtp_layers
+                extra_kwargs["mtp_loss_scaling_factor"] = 1.1
+
+            # build config
+            config = get_test_config(num_layers=layer_num, extra_kwargs=extra_kwargs)
+            config.expert_model_parallel_size = ep
+
+            # build model
+            gpt_model = build_model(config)
+            gpt_model.cuda()
+
+            dataloader = init_gpt_dataloader(
+                ps.get_data_parallel_group(),
+                vocab_size=gpt_model.vocab_size,
+                micro_batch_size=1,
+                sequence_length=gpt_model.max_sequence_length,
+                batch_size=4,
+            )
+            # for batch in dataloder:
+            for batch in dataloader:
+                batch["position_ids"] = torch.arange(
+                    gpt_model.max_sequence_length, dtype=torch.int64
+                )
+                batch = {k: v.cuda() for k, v in batch.items()}
+                gpt_model.zero_grad()
+                output = gpt_model(**batch)
+                loss = output.sum()
+                loss.backward()
+
+
+@pytest.mark.skipif(
+    "WORLD_SIZE" in os.environ and os.environ["WORLD_SIZE"] != "1", reason="Requires single GPU"
+)
+@pytest.mark.skipif(get_device_arch_version() != 10, reason="Requires GPU architecture = 10")
+class TestFusedLinearCrossEntropyDataParallel:
+    def cleanup(self):
+        torch.cuda.empty_cache()
+        torch.cuda.reset_peak_memory_stats()
+        import gc
+
+        gc.collect()
+        torch.cuda.synchronize()
+
+    @staticmethod
+    def torch_linear_cross_entropy(
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: torch.Tensor,
+        reduction: str,
+        ignore_index: int,
+    ):
+        # NOTE: need to convert to fp32 to fp32 accumulation,
+        # thus assure accuracy
+        logits = hidden.to(torch.float32) @ weight.T.to(torch.float32)
+        logprobs = torch.nn.functional.cross_entropy(
+            logits.view(-1, logits.shape[-1]),
+            labels.view(-1),
+            reduction=reduction,
+            ignore_index=ignore_index,
+        )
+        return logprobs.to(torch.float32)
+
+    @staticmethod
+    def get_problems():
+        return [
+            (80, 125, 64),
+            (80, 152064, 64),
+            (1024, 152064, 4096),
+            (4096, 152063, 8192),
+            ((1, 4096), 152064, 8192),
+            ((2, 4096), 152064, 8192),
+        ]
+
+    @staticmethod
+    def get_ignore_index():
+        return [-100, 4]
+
+    def test_kernel_launch(self):
+        """
+        Check if the compiled kernel can be
+        launched with different problem sizes
+        """
+        self.cleanup()
+
+        num_tokens = [15, 26, 128, 513, 2048, 8192]
+        vocab_size = 152064
+        dim = 4096
+        dtype = torch.bfloat16
+        reduction = "mean"
+        ignore_index = -100
+
+        weight = torch.randn(vocab_size, dim, dtype=dtype, device="cuda").requires_grad_()
+        for num_token in num_tokens:
+            hidden = torch.randn(num_token, dim, dtype=dtype, device="cuda").requires_grad_()
+            labels = torch.randint(0, vocab_size, (num_token,), dtype=torch.long, device="cuda")
+
+            logprobs = linear_cross_entropy(
+                hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+            )
+            assert not torch.isnan(logprobs).any()
+
+            gLogprobs = torch.randn_like(logprobs)
+            (d_hidden, d_weight) = torch.autograd.grad(
+                (logprobs,), (hidden, weight), (gLogprobs,), retain_graph=False
+            )
+            assert not torch.isnan(d_hidden).any()
+            assert not torch.isnan(d_weight).any()
+
+    @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+    @pytest.mark.parametrize("problem", get_problems())
+    @pytest.mark.parametrize("reduction", ["none", "mean", "sum"])
+    @pytest.mark.parametrize("ignore_index", get_ignore_index())
+    def test_correctness(self, dtype, problem, reduction, ignore_index):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (num_tokens,) if isinstance(num_tokens, int) else num_tokens
+
+        hidden = (
+            torch.empty(hidden_shape, dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        weight = (
+            torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+        if ignore_index >= 0 and ignore_index < vocabsize:
+            pad_labels = torch.nn.functional.pad(labels, (0, 1), value=ignore_index)
+            labels = pad_labels[..., 1:].contiguous()
+
+        # forward
+        torch_logprobs = self.torch_linear_cross_entropy(
+            hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+        )
+
+        custom_logprobs = linear_cross_entropy(
+            hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+        )
+
+        torch.testing.assert_close(torch_logprobs, custom_logprobs)
+
+        # backward
+        g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+
+        (d_torch_hidden, d_torch_weight) = torch.autograd.grad(
+            (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+
+        (d_custom_hidden, d_custom_weight) = torch.autograd.grad(
+            (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+
+        torch.testing.assert_close(d_torch_hidden, d_custom_hidden, atol=1e-3, rtol=1e-3)
+        torch.testing.assert_close(d_torch_weight, d_custom_weight, atol=1e-3, rtol=1e-3)
+
+    @pytest.mark.parametrize("problem", [((1, 4096), 129280, 7168)])
+    @pytest.mark.parametrize("dtype", [torch.bfloat16])
+    @pytest.mark.parametrize("reduction", ["mean"])
+    @pytest.mark.parametrize("ignore_index", [-100])
+    def test_performance(self, problem, dtype, reduction, ignore_index):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (num_tokens,) if isinstance(num_tokens, int) else num_tokens
+
+        start_event = torch.cuda.Event(enable_timing=True)
+        end_event = torch.cuda.Event(enable_timing=True)
+
+        torch_fwd_latency = list()
+        torch_bwd_latency = list()
+        custom_fwd_latency = list()
+        custom_bwd_latency = list()
+
+        iterations = 5
+        for i in range(iterations):
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+            if ignore_index >= 0 and ignore_index < vocabsize:
+                pad_labels = torch.nn.functional.pad(labels, (0, 1), value=ignore_index)
+                labels = pad_labels[..., 1:].contiguous()
+
+            # -------- forward -------- #
+            start_event.record()
+            torch_logprobs = self.torch_linear_cross_entropy(
+                hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            torch_fwd_latency.append(start_event.elapsed_time(end_event))
+
+            start_event.record()
+            custom_logprobs = linear_cross_entropy(
+                hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            custom_fwd_latency.append(start_event.elapsed_time(end_event))
+
+            # -------- backward -------- #
+            g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+
+            start_event.record()
+            (d_torch_hidden, d_torch_weight) = torch.autograd.grad(
+                (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            torch_bwd_latency.append(start_event.elapsed_time(end_event))
+
+            start_event.record()
+            (d_custom_hidden, d_custom_weight) = torch.autograd.grad(
+                (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            custom_bwd_latency.append(start_event.elapsed_time(end_event))
+
+        # --- remove first latency due to warmup --- #
+        torch_fwd_latency = torch_fwd_latency[1:]
+        torch_bwd_latency = torch_bwd_latency[1:]
+        custom_fwd_latency = custom_fwd_latency[1:]
+        custom_bwd_latency = custom_bwd_latency[1:]
+
+        print()
+        print(f"[INFO]: On problem {problem}, dtype {dtype}, reduction {reduction}:")
+        print(
+            f"[INFO]: Torch forward latency: {sum(torch_fwd_latency) / len(torch_fwd_latency):.2f} ms"
+        )
+        print(
+            f"[INFO]: Custom forward latency: {sum(custom_fwd_latency) / len(custom_fwd_latency):.2f} ms"
+        )
+        print(
+            f"[INFO]: Torch backward latency: {sum(torch_bwd_latency) / len(torch_bwd_latency):.2f} ms"
+        )
+        print(
+            f"[INFO]: Custom backward latency: {sum(custom_bwd_latency) / len(custom_bwd_latency):.2f} ms"
+        )
+
+    @pytest.mark.parametrize("problem", [((1, 4096), 129280, 7168)])
+    @pytest.mark.parametrize("dtype", [torch.bfloat16])
+    @pytest.mark.parametrize("reduction", ["mean"])
+    @pytest.mark.parametrize("ignore_index", [-100])
+    def test_storage(self, problem, dtype, reduction, ignore_index):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (num_tokens,) if isinstance(num_tokens, int) else num_tokens
+        print()
+        print(f"[INFO]: On problem {problem}, dtype {dtype}, reduction {reduction}:")
+
+        def torch_storage():
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+            if ignore_index >= 0 and ignore_index < vocabsize:
+                pad_labels = torch.nn.functional.pad(labels, (0, 1), value=ignore_index)
+                labels = pad_labels[..., 1:].contiguous()
+
+            torch.cuda.reset_peak_memory_stats()
+            torch_logprobs = self.torch_linear_cross_entropy(
+                hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+            )
+            torch.cuda.synchronize()
+            torch_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            print(f"[INFO]: Torch Forward pass peak memory: {torch_max_memory:.2f} MB")
+
+            torch.cuda.reset_peak_memory_stats()
+            g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+            (d_torch_hidden, d_torch_weight) = torch.autograd.grad(
+                (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            torch.cuda.synchronize()
+            torch_backward_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            print(f"[INFO]: Torch Backward pass peak memory: {torch_backward_max_memory:.2f} MB")
+
+        def custom_storage():
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+            if ignore_index >= 0 and ignore_index < vocabsize:
+                pad_labels = torch.nn.functional.pad(labels, (0, 1), value=ignore_index)
+                labels = pad_labels[..., 1:].contiguous()
+
+            torch.cuda.reset_peak_memory_stats()
+            custom_logprobs = linear_cross_entropy(
+                hidden, weight, labels, reduction=reduction, ignore_index=ignore_index
+            )
+            torch.cuda.synchronize()
+            custom_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            print(f"[INFO]: Custom Forward pass peak memory: {custom_max_memory:.2f} MB")
+
+            torch.cuda.reset_peak_memory_stats()
+            g_logprobs = torch.empty_like(custom_logprobs).uniform_(-0.1, 0.1)
+            (d_custom_hidden, d_custom_weight) = torch.autograd.grad(
+                (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            torch.cuda.synchronize()
+            custom_backward_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            print(f"[INFO]: Custom Backward pass peak memory: {custom_backward_max_memory:.2f} MB")
+
+        self.cleanup()
+        torch_storage()
+        self.cleanup()
+        custom_storage()
+
+
+@pytest.mark.skipif(
+    ("WORLD_SIZE" not in os.environ or int(os.environ["WORLD_SIZE"]) < 2),  # or True,
+    reason="Requires torchrun with multiple GPUs",
+)
+@pytest.mark.skipif(get_device_arch_version() != 10, reason="Requires GPU architecture = 10")
+@pytest.mark.usefixtures("distributed_context")
+class TestFusedLinearCrossEntropyTensorParallel:
+    @pytest.fixture(autouse=True)
+    def setup_attrs(self, distributed_context):
+        """
+        Setup attributes for the test class.
+        """
+        self.tp_group = distributed_context.group
+        self.tp_rank = distributed_context.rank
+        self.tp_world_size = distributed_context.world_size
+        self.is_chief = distributed_context.is_chief
+
+    def cleanup(self):
+        torch.cuda.empty_cache()
+        torch.cuda.reset_peak_memory_stats()
+        import gc
+
+        gc.collect()
+        torch.cuda.synchronize()
+
+    @staticmethod
+    def torch_linear_cross_entropy_single_gpu(
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: torch.Tensor,
+        reduction: typing.Optional[str] = "mean",
+    ):
+        logits = hidden.to(torch.float32) @ weight.T.to(torch.float32)
+        logprobs = torch.nn.functional.cross_entropy(
+            logits.view(-1, logits.shape[-1]), labels.view(-1), reduction=reduction
+        )
+        return logprobs.to(torch.float32)
+
+    class TorchLinearCrossEntropy(torch.autograd.Function):
+        @staticmethod
+        def forward(
+            ctx,
+            hidden: torch.Tensor,
+            weight: torch.Tensor,
+            labels: torch.Tensor,
+            tp_group: torch.distributed.ProcessGroup,
+            reduction: typing.Optional[str] = "mean",
+        ):
+            tp_rank = 0 if tp_group is None else torch.distributed.get_rank(tp_group)
+            tp_world_size = 1 if tp_group is None else torch.distributed.get_world_size(tp_group)
+
+            logits = hidden.to(torch.float32) @ weight.T.to(torch.float32)
+
+            whole_logits = torch.empty(
+                (logits.shape[0], logits.shape[-1] * tp_world_size),
+                dtype=logits.dtype,
+                device=logits.device,
+            )
+            whole_logits_ref = [
+                whole_logits[..., i * logits.shape[-1] : (i + 1) * logits.shape[-1]]
+                for i in range(tp_world_size)
+            ]
+            dist.all_gather(whole_logits_ref, logits, group=tp_group)
+
+            logprobs = torch.nn.functional.cross_entropy(
+                whole_logits.view(-1, whole_logits.shape[-1]), labels.view(-1), reduction=reduction
+            )
+
+            # If we don't preserve whole_logits,
+            # we need to re-compute it in the backward pass
+            ctx.save_for_backward(hidden, weight, labels)
+            ctx.tp_group = tp_group
+            ctx.reduction = reduction
+            ctx.tp_rank = tp_rank
+            ctx.tp_world_size = tp_world_size
+
+            return logprobs.to(torch.float32)
+
+        @staticmethod
+        def backward(ctx, g_logprobs: torch.Tensor):
+            hidden, weight, labels = ctx.saved_tensors
+            tp_group = ctx.tp_group
+            reduction = ctx.reduction
+            tp_rank = ctx.tp_rank
+            tp_world_size = ctx.tp_world_size
+
+            num_tokens, dim = hidden.shape
+
+            if reduction == "mean":
+                _g_logprobs = torch.broadcast_to(g_logprobs / num_tokens, (num_tokens,))
+            elif reduction == "sum":
+                _g_logprobs = torch.broadcast_to(g_logprobs, (num_tokens,))
+            else:
+                _g_logprobs = g_logprobs
+
+            # re-compute whole_logits
+            logits = hidden.to(torch.float32) @ weight.T.to(torch.float32)
+            whole_logits = torch.empty(
+                (logits.shape[0], logits.shape[-1] * tp_world_size),
+                dtype=logits.dtype,
+                device=logits.device,
+            )
+            whole_logits_ref = [
+                whole_logits[..., i * logits.shape[-1] : (i + 1) * logits.shape[-1]]
+                for i in range(tp_world_size)
+            ]
+            dist.all_gather(whole_logits_ref, logits, group=tp_group)
+
+            one_hot = torch.zeros_like(whole_logits)
+            one_hot.scatter_(1, labels.view(-1).unsqueeze(-1), 1)
+
+            pd = torch.nn.functional.softmax(whole_logits, dim=-1)
+            d_logits = (pd - one_hot) * _g_logprobs.unsqueeze(-1)
+            d_logits = d_logits.to(hidden.dtype)
+
+            local_size = weight.size(0)
+            local_d_logits = d_logits[:, tp_rank * local_size : (tp_rank + 1) * local_size]
+
+            local_d_hidden = local_d_logits @ weight
+            local_d_weight = local_d_logits.T @ hidden
+
+            dist.all_reduce(local_d_hidden, op=dist.ReduceOp.SUM, group=tp_group)
+
+            return local_d_hidden, local_d_weight, None, None, None
+
+    @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+    @pytest.mark.parametrize("reduction", ["mean", "sum", "none"])
+    @pytest.mark.parametrize("problem", [(4096, 129280, 8192)])
+    def test_torch_tp_vs_single_gpu(self, dtype, reduction, problem):
+        num_tokens, vocabsize, dim = problem
+        vocabsize = vocabsize // self.tp_world_size
+
+        hidden = (
+            torch.empty((num_tokens, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        weight = (
+            torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        labels = torch.randint(0, vocabsize, (num_tokens,), dtype=torch.long, device="cuda")
+
+        # ------------ forward pass ------------ #
+        dist.broadcast(hidden, src=0, group=self.tp_group)
+        dist.broadcast(labels, src=0, group=self.tp_group)
+
+        # single GPU
+        whole_weight = torch.empty(
+            (vocabsize * self.tp_world_size, dim), dtype=dtype, device="cuda"
+        )
+        whole_weight_view = [
+            whole_weight[i * vocabsize : (i + 1) * vocabsize, :] for i in range(self.tp_world_size)
+        ]
+        dist.all_gather(whole_weight_view, weight, group=self.tp_group)
+        whole_weight = whole_weight.clone().requires_grad_()
+        logprobs_single_gpu = self.torch_linear_cross_entropy_single_gpu(
+            hidden, whole_weight, labels, reduction=reduction
+        )
+
+        # TP
+        logprobs_tp = self.TorchLinearCrossEntropy.apply(
+            hidden, weight, labels, self.tp_group, reduction
+        )
+        torch.testing.assert_close(logprobs_single_gpu, logprobs_tp)
+
+        # ------------ backward pass ------------ #
+        g_logprobs = torch.empty_like(logprobs_single_gpu).uniform_(-0.1, 0.1)
+        dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+        # single GPU
+        (d_hidden_single_gpu, d_weight_single_gpu) = torch.autograd.grad(
+            (logprobs_single_gpu,), (hidden, whole_weight), (g_logprobs,), retain_graph=False
+        )
+
+        # TP
+        (d_hidden_tp, d_weight_tp) = torch.autograd.grad(
+            (logprobs_tp,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+        torch.testing.assert_close(d_hidden_single_gpu, d_hidden_tp, atol=1e-3, rtol=1e-3)
+        local_d_weight_single_gpu = d_weight_single_gpu[
+            self.tp_rank * weight.shape[0] : (self.tp_rank + 1) * weight.shape[0], :
+        ]
+        torch.testing.assert_close(local_d_weight_single_gpu, d_weight_tp, atol=1e-3, rtol=1e-3)
+
+    @staticmethod
+    def get_problems():
+        return [
+            (80, 125, 64),
+            (80, 152064, 64),
+            (1024, 152064, 4096),
+            (4096, 152063, 8192),
+            ((1, 4096), 152064, 8192),
+            ((2, 4096), 152064, 8192),
+        ]
+
+    @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+    @pytest.mark.parametrize("reduction", ["mean", "sum", "none"])
+    @pytest.mark.parametrize("problem", get_problems())
+    def test_correctness(self, dtype, reduction, problem):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (num_tokens,) if isinstance(num_tokens, int) else num_tokens
+
+        hidden = (
+            torch.empty(hidden_shape, dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        weight = (
+            torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+        # ------ forward pass ------ #
+        dist.broadcast(hidden, src=0, group=self.tp_group)
+        dist.broadcast(labels, src=0, group=self.tp_group)
+
+        torch_logprobs = self.TorchLinearCrossEntropy.apply(
+            hidden.view(-1, dim), weight, labels, self.tp_group, reduction
+        )
+
+        custom_logprobs = linear_cross_entropy(
+            hidden, weight, labels, tp_group=self.tp_group, reduction=reduction
+        )
+
+        torch.testing.assert_close(torch_logprobs, custom_logprobs)
+
+        # ------- backward pass ------- #
+        g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+        dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+        (d_hidden_torch, d_weight_torch) = torch.autograd.grad(
+            (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+        (d_hidden_custom, d_weight_custom) = torch.autograd.grad(
+            (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+        torch.testing.assert_close(d_hidden_torch, d_hidden_custom, atol=1e-3, rtol=1e-3)
+        torch.testing.assert_close(d_weight_torch, d_weight_custom, atol=1e-4, rtol=1e-4)
+
+    @pytest.mark.parametrize("problem", [((1, 4096), 129280, 7168)])
+    @pytest.mark.parametrize("dtype", [torch.bfloat16])
+    @pytest.mark.parametrize("reduction", ["mean"])
+    def test_performance(self, problem, dtype, reduction):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (num_tokens,) if isinstance(num_tokens, int) else num_tokens
+
+        start_event = torch.cuda.Event(enable_timing=True)
+        end_event = torch.cuda.Event(enable_timing=True)
+
+        torch_fwd_latency = list()
+        torch_bwd_latency = list()
+        custom_fwd_latency = list()
+        custom_bwd_latency = list()
+
+        iterations = 5
+        for i in range(iterations):
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+            # ------ forward pass ------ #
+            dist.broadcast(hidden, src=0, group=self.tp_group)
+            dist.broadcast(labels, src=0, group=self.tp_group)
+
+            start_event.record()
+            torch_logprobs = self.TorchLinearCrossEntropy.apply(
+                hidden.view(-1, dim), weight, labels, self.tp_group, reduction
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            torch_fwd_latency.append(start_event.elapsed_time(end_event))
+
+            start_event.record()
+            custom_logprobs = linear_cross_entropy(
+                hidden, weight, labels, tp_group=self.tp_group, reduction=reduction
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            custom_fwd_latency.append(start_event.elapsed_time(end_event))
+
+            # ------- backward pass ------- #
+            g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+            dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+            start_event.record()
+            (d_hidden_torch, d_weight_torch) = torch.autograd.grad(
+                (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            torch_bwd_latency.append(start_event.elapsed_time(end_event))
+
+            start_event.record()
+            (d_hidden_custom, d_weight_custom) = torch.autograd.grad(
+                (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            custom_bwd_latency.append(start_event.elapsed_time(end_event))
+
+        # --- remove first latency due to warmup --- #
+        torch_fwd_latency = torch_fwd_latency[1:]
+        torch_bwd_latency = torch_bwd_latency[1:]
+        custom_fwd_latency = custom_fwd_latency[1:]
+        custom_bwd_latency = custom_bwd_latency[1:]
+
+        if self.is_chief:
+            print()
+            print(
+                f"[INFO]: On problem {problem}, dtype {dtype}, reduction {reduction}, TP size {self.tp_world_size}:"
+            )
+            print(
+                f"[INFO]: Torch forward latency: {sum(torch_fwd_latency) / len(torch_fwd_latency):.2f} ms"
+            )
+            print(
+                f"[INFO]: Custom forward latency: {sum(custom_fwd_latency) / len(custom_fwd_latency):.2f} ms"
+            )
+            print(
+                f"[INFO]: Torch backward latency: {sum(torch_bwd_latency) / len(torch_bwd_latency):.2f} ms"
+            )
+            print(
+                f"[INFO]: Custom backward latency: {sum(custom_bwd_latency) / len(custom_bwd_latency):.2f} ms"
+            )
+
+    @pytest.mark.parametrize("problem", [((1, 4096), 129280, 7168)])
+    @pytest.mark.parametrize("dtype", [torch.bfloat16])
+    @pytest.mark.parametrize("reduction", ["mean"])
+    def test_storage(self, problem, dtype, reduction):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (num_tokens,) if isinstance(num_tokens, int) else num_tokens
+
+        if self.is_chief:
+            print()
+            print(
+                f"[INFO]: On problem {problem}, dtype {dtype}, reduction {reduction}, TP size {self.tp_world_size}:"
+            )
+
+        def torch_storage():
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+            dist.broadcast(hidden, src=0, group=self.tp_group)
+            dist.broadcast(labels, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            torch_logprobs = self.TorchLinearCrossEntropy.apply(
+                hidden.view(-1, dim), weight, labels, self.tp_group, reduction
+            )
+            torch.cuda.synchronize()
+            torch_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Torch Forward pass peak memory: {torch_max_memory:.2f} MB"
+                )
+
+            g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+            dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            (d_hidden_torch, d_weight_torch) = torch.autograd.grad(
+                (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            torch.cuda.synchronize()
+            torch_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Torch Backward pass peak memory: {torch_max_memory:.2f} MB"
+                )
+
+        def custom_storage():
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+            dist.broadcast(hidden, src=0, group=self.tp_group)
+            dist.broadcast(labels, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            custom_logprobs = linear_cross_entropy(
+                hidden, weight, labels, tp_group=self.tp_group, reduction=reduction
+            )
+            torch.cuda.synchronize()
+            custom_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Custom Forward pass peak memory: {custom_max_memory:.2f} MB"
+                )
+
+            g_logprobs = torch.empty_like(custom_logprobs).uniform_(-0.1, 0.1)
+            dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            (d_hidden_custom, d_weight_custom) = torch.autograd.grad(
+                (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            torch.cuda.synchronize()
+            custom_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Custom Backward pass peak memory: {custom_max_memory:.2f} MB"
+                )
+
+        self.cleanup()
+        torch_storage()
+        self.cleanup()
+        custom_storage()
+
+
+@pytest.mark.skipif(
+    "WORLD_SIZE" not in os.environ or int(os.environ["WORLD_SIZE"]) < 2,
+    reason="Requires torchrun with multiple GPUs",
+)
+@pytest.mark.skipif(get_device_arch_version() != 10, reason="Requires GPU architecture = 10")
+@pytest.mark.usefixtures("distributed_context")
+class TestFusedLinearCrossEntropySequenceParallel:
+    @pytest.fixture(autouse=True)
+    def setup_attrs(self, distributed_context):
+        """
+        Setup attributes for the test class.
+        """
+        self.tp_group = distributed_context.group
+        self.tp_rank = distributed_context.rank
+        self.tp_world_size = distributed_context.world_size
+        self.is_chief = distributed_context.is_chief
+
+    @staticmethod
+    def timed_barrier(timeout_s=10):
+        import time
+
+        work = torch.distributed.barrier(async_op=True)
+        t0 = time.time()
+        while not work.is_completed():
+            if time.time() - t0 > timeout_s:
+                exit(1)
+            time.sleep(0.05)
+        work.wait()
+
+    def cleanup(self):
+        torch.cuda.empty_cache()
+        torch.cuda.reset_peak_memory_stats()
+        import gc
+
+        gc.collect()
+        torch.cuda.synchronize()
+
+    @staticmethod
+    def torch_linear_cross_entropy_single_gpu(
+        hidden: torch.Tensor,
+        weight: torch.Tensor,
+        labels: torch.Tensor,
+        reduction: typing.Optional[str] = "mean",
+    ):
+        logits = hidden.to(torch.float32) @ weight.T.to(torch.float32)
+        logprobs = torch.nn.functional.cross_entropy(
+            logits.view(-1, logits.shape[-1]), labels.view(-1), reduction=reduction
+        )
+        return logprobs.to(torch.float32)
+
+    class TorchLinearCrossEntropy(torch.autograd.Function):
+        @staticmethod
+        def forward(
+            ctx,
+            hidden: torch.Tensor,
+            weight: torch.Tensor,
+            labels: torch.Tensor,
+            tp_group: torch.distributed.ProcessGroup,
+            reduction: typing.Optional[str] = "mean",
+        ):
+            tp_rank = 0 if tp_group is None else torch.distributed.get_rank(tp_group)
+            tp_world_size = 1 if tp_group is None else torch.distributed.get_world_size(tp_group)
+
+            whole_hidden = torch.empty(
+                (hidden.shape[0] * tp_world_size, hidden.shape[-1]),
+                dtype=hidden.dtype,
+                device=hidden.device,
+            )
+            dist.all_gather_into_tensor(whole_hidden, hidden, group=tp_group)
+
+            logits = whole_hidden.to(torch.float32) @ weight.T.to(torch.float32)
+
+            whole_logits = torch.empty(
+                (logits.shape[0], logits.shape[-1] * tp_world_size),
+                dtype=logits.dtype,
+                device=logits.device,
+            )
+            whole_logits_ref = [
+                whole_logits[..., i * logits.shape[-1] : (i + 1) * logits.shape[-1]]
+                for i in range(tp_world_size)
+            ]
+            dist.all_gather(whole_logits_ref, logits, group=tp_group)
+
+            logprobs = torch.nn.functional.cross_entropy(
+                whole_logits.view(-1, whole_logits.shape[-1]), labels.view(-1), reduction=reduction
+            )
+
+            # If we don't preserve whole_logits,
+            # we need to re-compute it in the backward pass
+            ctx.save_for_backward(whole_hidden, weight, labels)
+            ctx.tp_group = tp_group
+            ctx.reduction = reduction
+            ctx.tp_rank = tp_rank
+            ctx.tp_world_size = tp_world_size
+
+            return logprobs.to(torch.float32)
+
+        @staticmethod
+        def backward(ctx, g_logprobs: torch.Tensor):
+            whole_hidden, weight, labels = ctx.saved_tensors
+            tp_group = ctx.tp_group
+            reduction = ctx.reduction
+            tp_rank = ctx.tp_rank
+            tp_world_size = ctx.tp_world_size
+
+            num_tokens, dim = whole_hidden.shape
+
+            if reduction == "mean":
+                _g_logprobs = torch.broadcast_to(g_logprobs / num_tokens, (num_tokens,))
+            elif reduction == "sum":
+                _g_logprobs = torch.broadcast_to(g_logprobs, (num_tokens,))
+            else:
+                _g_logprobs = g_logprobs
+
+            # re-compute whole_logits
+            logits = whole_hidden.to(torch.float32) @ weight.T.to(torch.float32)
+            whole_logits = torch.empty(
+                (logits.shape[0], logits.shape[-1] * tp_world_size),
+                dtype=logits.dtype,
+                device=logits.device,
+            )
+            whole_logits_ref = [
+                whole_logits[..., i * logits.shape[-1] : (i + 1) * logits.shape[-1]]
+                for i in range(tp_world_size)
+            ]
+            dist.all_gather(whole_logits_ref, logits, group=tp_group)
+
+            one_hot = torch.zeros_like(whole_logits)
+            one_hot.scatter_(1, labels.view(-1).unsqueeze(-1), 1)
+
+            pd = torch.nn.functional.softmax(whole_logits, dim=-1)
+            d_logits = (pd - one_hot) * _g_logprobs.unsqueeze(-1)
+            d_logits = d_logits.to(whole_hidden.dtype)
+
+            local_size = weight.size(0)
+            local_d_logits = d_logits[:, tp_rank * local_size : (tp_rank + 1) * local_size]
+
+            d_hidden = local_d_logits @ weight
+            local_d_weight = local_d_logits.T @ whole_hidden
+
+            # dist.all_reduce(
+            #     local_d_hidden,
+            #     op=dist.ReduceOp.SUM,
+            #     group=tp_group
+            # )
+
+            # split the local_d_hidden along the sequence length dimension
+            local_num_tokens = num_tokens // tp_world_size
+            # local_d_hidden = local_d_hidden[tp_rank * local_num_tokens : (tp_rank + 1) * local_num_tokens, :]
+
+            local_d_hidden = torch.empty(
+                (local_num_tokens, dim), dtype=weight.dtype, device=weight.device
+            )
+            dist.reduce_scatter_tensor(
+                local_d_hidden, d_hidden, op=dist.ReduceOp.SUM, group=tp_group
+            )
+            return local_d_hidden, local_d_weight, None, None, None
+
+    @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+    @pytest.mark.parametrize("reduction", ["mean", "sum", "none"])
+    @pytest.mark.parametrize("problem", [(256, 129280, 8192)])
+    def test_torch_sp_vs_single_gpu(self, dtype, reduction, problem):
+        num_tokens, vocabsize, dim = problem
+        vocabsize = vocabsize // self.tp_world_size
+
+        hidden = (
+            torch.empty((num_tokens, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        weight = (
+            torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        labels = torch.randint(
+            0, vocabsize, (num_tokens * self.tp_world_size,), dtype=torch.long, device="cuda"
+        )
+
+        # ------------ forward pass ------------ #
+        dist.broadcast(labels, src=0, group=self.tp_group)
+
+        # single GPU
+        whole_hidden = torch.empty(
+            (num_tokens * self.tp_world_size, dim), dtype=dtype, device="cuda"
+        )
+        dist.all_gather_into_tensor(whole_hidden, hidden, group=self.tp_group)
+        whole_hidden = whole_hidden.clone().requires_grad_()
+
+        whole_weight = torch.empty(
+            (vocabsize * self.tp_world_size, dim), dtype=dtype, device="cuda"
+        )
+        whole_weight_view = [
+            whole_weight[i * vocabsize : (i + 1) * vocabsize, :] for i in range(self.tp_world_size)
+        ]
+        dist.all_gather(whole_weight_view, weight, group=self.tp_group)
+        whole_weight = whole_weight.clone().requires_grad_()
+        logprobs_single_gpu = self.torch_linear_cross_entropy_single_gpu(
+            whole_hidden, whole_weight, labels, reduction=reduction
+        )
+
+        # TP
+        logprobs_tp = self.TorchLinearCrossEntropy.apply(
+            hidden, weight, labels, self.tp_group, reduction
+        )
+        torch.testing.assert_close(logprobs_single_gpu, logprobs_tp)
+
+        # ------------ backward pass ------------ #
+        g_logprobs = torch.empty_like(logprobs_single_gpu).uniform_(-0.1, 0.1)
+        dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+        # single GPU
+        (d_hidden_single_gpu, d_weight_single_gpu) = torch.autograd.grad(
+            (logprobs_single_gpu,), (whole_hidden, whole_weight), (g_logprobs,), retain_graph=False
+        )
+
+        # TP
+        (d_hidden_tp, d_weight_tp) = torch.autograd.grad(
+            (logprobs_tp,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+
+        local_d_hidden_single_gpu = d_hidden_single_gpu[
+            self.tp_rank * hidden.shape[0] : (self.tp_rank + 1) * hidden.shape[0], :
+        ]
+        torch.testing.assert_close(local_d_hidden_single_gpu, d_hidden_tp, atol=1e-3, rtol=1e-3)
+        local_d_weight_single_gpu = d_weight_single_gpu[
+            self.tp_rank * weight.shape[0] : (self.tp_rank + 1) * weight.shape[0], :
+        ]
+        torch.testing.assert_close(local_d_weight_single_gpu, d_weight_tp, atol=1e-3, rtol=1e-3)
+
+        self.cleanup()
+
+    @staticmethod
+    def get_problems():
+        return [
+            (80, 125, 64),
+            (80, 152064, 64),
+            (1024, 152064, 4096),
+            (4096, 15206, 1024),
+            ((1, 4096), 15206, 1024),
+            ((4, 1024), 15206, 1024),
+        ]
+
+    @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+    @pytest.mark.parametrize("reduction", ["mean", "sum", "none"])
+    @pytest.mark.parametrize("problem", get_problems())
+    def test_correctness(self, dtype, reduction, problem):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (
+            (num_tokens * self.tp_world_size,)
+            if isinstance(num_tokens, int)
+            else (num_tokens[0] * self.tp_world_size, *num_tokens[1:])
+        )
+
+        hidden = (
+            torch.empty(hidden_shape, dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        weight = (
+            torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+            .uniform_(-0.1, 0.1)
+            .requires_grad_()
+        )
+        labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+        # ------ forward pass ------ #
+        dist.broadcast(labels, src=0, group=self.tp_group)
+
+        torch_logprobs = self.TorchLinearCrossEntropy.apply(
+            hidden.view(-1, dim), weight, labels, self.tp_group, reduction
+        )
+
+        custom_logprobs = linear_cross_entropy(
+            hidden,
+            weight,
+            labels,
+            tp_group=self.tp_group,
+            reduction=reduction,
+            sequence_parallel=True,
+        )
+
+        torch.testing.assert_close(torch_logprobs, custom_logprobs)
+
+        # ------- backward pass ------- #
+        g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+        dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+        (d_hidden_torch, d_weight_torch) = torch.autograd.grad(
+            (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+        (d_hidden_custom, d_weight_custom) = torch.autograd.grad(
+            (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+        )
+
+        # in case one GPU failed, and leading to hang
+        torch.testing.assert_close(d_hidden_torch, d_hidden_custom, atol=1e-3, rtol=1e-3)
+        torch.testing.assert_close(d_weight_torch, d_weight_custom, atol=1e-3, rtol=1e-3)
+        self.timed_barrier()
+
+        self.cleanup()
+
+    @pytest.mark.parametrize("problem", [((1, 1024), 129280, 7168)])
+    @pytest.mark.parametrize("dtype", [torch.bfloat16])
+    @pytest.mark.parametrize("reduction", ["mean"])
+    def test_performance(self, problem, dtype, reduction):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (
+            (num_tokens * self.tp_world_size,)
+            if isinstance(num_tokens, int)
+            else (num_tokens[0] * self.tp_world_size, *num_tokens[1:])
+        )
+
+        start_event = torch.cuda.Event(enable_timing=True)
+        end_event = torch.cuda.Event(enable_timing=True)
+
+        torch_fwd_latency = list()
+        torch_bwd_latency = list()
+        custom_fwd_latency = list()
+        custom_bwd_latency = list()
+
+        iterations = 5
+        for i in range(iterations):
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+            # ------ forward pass ------ #
+            dist.broadcast(labels, src=0, group=self.tp_group)
+
+            start_event.record()
+            torch_logprobs = self.TorchLinearCrossEntropy.apply(
+                hidden.view(-1, dim), weight, labels, self.tp_group, reduction
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            torch_fwd_latency.append(start_event.elapsed_time(end_event))
+
+            start_event.record()
+            custom_logprobs = linear_cross_entropy(
+                hidden,
+                weight,
+                labels,
+                tp_group=self.tp_group,
+                reduction=reduction,
+                sequence_parallel=True,
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            custom_fwd_latency.append(start_event.elapsed_time(end_event))
+
+            # ------- backward pass ------- #
+            g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+            dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+            start_event.record()
+            (d_hidden_torch, d_weight_torch) = torch.autograd.grad(
+                (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            torch_bwd_latency.append(start_event.elapsed_time(end_event))
+
+            start_event.record()
+            (d_hidden_custom, d_weight_custom) = torch.autograd.grad(
+                (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            end_event.record()
+            torch.cuda.synchronize()
+            custom_bwd_latency.append(start_event.elapsed_time(end_event))
+
+        # --- remove first latency due to warmup --- #
+        torch_fwd_latency = torch_fwd_latency[1:]
+        torch_bwd_latency = torch_bwd_latency[1:]
+        custom_fwd_latency = custom_fwd_latency[1:]
+        custom_bwd_latency = custom_bwd_latency[1:]
+
+        if self.is_chief:
+            print()
+            print(
+                f"[INFO]: On problem {problem}, dtype {dtype}, reduction {reduction}, TP size {self.tp_world_size}, Sequence Parallel: True:"
+            )
+            print(
+                f"[INFO]: Torch forward latency: {sum(torch_fwd_latency) / len(torch_fwd_latency):.2f} ms"
+            )
+            print(
+                f"[INFO]: Custom forward latency: {sum(custom_fwd_latency) / len(custom_fwd_latency):.2f} ms"
+            )
+            print(
+                f"[INFO]: Torch backward latency: {sum(torch_bwd_latency) / len(torch_bwd_latency):.2f} ms"
+            )
+            print(
+                f"[INFO]: Custom backward latency: {sum(custom_bwd_latency) / len(custom_bwd_latency):.2f} ms"
+            )
+
+    @pytest.mark.parametrize("problem", [((1, 1024), 129280, 7168)])
+    @pytest.mark.parametrize("dtype", [torch.bfloat16])
+    @pytest.mark.parametrize("reduction", ["mean"])
+    def test_storage(self, problem, dtype, reduction):
+        num_tokens, vocabsize, dim = problem
+        hidden_shape = (num_tokens, dim) if isinstance(num_tokens, int) else (*num_tokens, dim)
+        labels_shape = (
+            (num_tokens * self.tp_world_size,)
+            if isinstance(num_tokens, int)
+            else (num_tokens[0] * self.tp_world_size, *num_tokens[1:])
+        )
+
+        if self.is_chief:
+            print()
+            print(
+                f"[INFO]: On problem {problem}, dtype {dtype}, reduction {reduction}, TP size {self.tp_world_size}, Sequence Parallel: True:"
+            )
+
+        def torch_storage():
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+            dist.broadcast(hidden, src=0, group=self.tp_group)
+            dist.broadcast(labels, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            torch_logprobs = self.TorchLinearCrossEntropy.apply(
+                hidden.view(-1, dim), weight, labels, self.tp_group, reduction
+            )
+            torch.cuda.synchronize()
+            torch_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Torch Forward pass peak memory: {torch_max_memory:.2f} MB"
+                )
+
+            g_logprobs = torch.empty_like(torch_logprobs).uniform_(-0.1, 0.1)
+            dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            (d_hidden_torch, d_weight_torch) = torch.autograd.grad(
+                (torch_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            torch.cuda.synchronize()
+            torch_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Torch Backward pass peak memory: {torch_max_memory:.2f} MB"
+                )
+
+        def custom_storage():
+            hidden = (
+                torch.empty(hidden_shape, dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            weight = (
+                torch.empty((vocabsize, dim), dtype=dtype, device="cuda")
+                .uniform_(-0.1, 0.1)
+                .requires_grad_()
+            )
+            labels = torch.randint(0, vocabsize, labels_shape, dtype=torch.long, device="cuda")
+
+            dist.broadcast(hidden, src=0, group=self.tp_group)
+            dist.broadcast(labels, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            custom_logprobs = linear_cross_entropy(
+                hidden,
+                weight,
+                labels,
+                tp_group=self.tp_group,
+                reduction=reduction,
+                sequence_parallel=True,
+            )
+            torch.cuda.synchronize()
+            custom_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Custom Forward pass peak memory: {custom_max_memory:.2f} MB"
+                )
+
+            g_logprobs = torch.empty_like(custom_logprobs).uniform_(-0.1, 0.1)
+            dist.broadcast(g_logprobs, src=0, group=self.tp_group)
+
+            torch.cuda.reset_peak_memory_stats()
+            (d_hidden_custom, d_weight_custom) = torch.autograd.grad(
+                (custom_logprobs,), (hidden, weight), (g_logprobs,), retain_graph=False
+            )
+            torch.cuda.synchronize()
+            custom_max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024
+            if self.is_chief:
+                print(
+                    f"[INFO]: On GPU {self.tp_rank}, Custom Backward pass peak memory: {custom_max_memory:.2f} MB"
+                )
+
+        self.cleanup()
+        torch_storage()
+        self.cleanup()
+        custom_storage()
diff --git a/tests/unit_tests/fusions/test_fused_mhc_kernels.py b/tests/unit_tests/fusions/test_fused_mhc_kernels.py
new file mode 100644
index 00000000000..15468df8264
--- /dev/null
+++ b/tests/unit_tests/fusions/test_fused_mhc_kernels.py
@@ -0,0 +1,564 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""Unit tests for fused mHC kernels (cuTile) and native implementations.
+
+Each test compares the fused kernel's forward output AND backward gradients
+against a pure-PyTorch differentiable reference to catch numerical drift
+introduced by kernel fusion.
+"""
+
+import math
+from typing import Optional
+
+import pytest
+import torch
+from torch import Tensor
+
+from megatron.core.fusions.fused_mhc_kernels import is_cutile_available
+from megatron.core.transformer.hyper_connection import (
+    native_h_aggregate,
+    native_h_post_bda,
+    native_proj_rms,
+    native_sinkhorn,
+)
+
+_require_cutile = pytest.mark.skipif(not is_cutile_available(), reason="cuTile not installed")
+
+
+@pytest.fixture(autouse=True)
+def _skip_without_cuda():
+    if not torch.cuda.is_available():
+        pytest.skip("CUDA not available")
+
+
+DTYPE = torch.bfloat16
+DEVICE = "cuda"
+FWD_ATOL, FWD_RTOL = 2e-2, 2e-2
+BWD_ATOL, BWD_RTOL = 5e-2, 5e-2
+RAND_LO, RAND_HI = -0.1, 0.1
+COSINE_SIM_THRESH = 0.999
+
+
+def _assert_cosine_similar(a: Tensor, b: Tensor, threshold: float, msg: str = ""):
+    """Assert that flattened tensors have cosine similarity >= threshold."""
+    a_flat = a.flatten().float()
+    b_flat = b.flatten().float()
+    sim = torch.nn.functional.cosine_similarity(a_flat.unsqueeze(0), b_flat.unsqueeze(0)).item()
+    assert sim >= threshold, (
+        f"{msg}: cosine similarity {sim:.6f} < {threshold} "
+        f"(max_abs_diff={torch.max(torch.abs(a_flat - b_flat)):.6e})"
+    )
+
+
+def _rand(*shape, **kwargs):
+    """Uniform in [RAND_LO, RAND_HI] to keep magnitudes small for bf16 stability."""
+    return torch.empty(*shape, dtype=DTYPE, device=DEVICE, **kwargs).uniform_(RAND_LO, RAND_HI)
+
+
+def _info():
+    backend = "cuTile" if is_cutile_available() else "native"
+    print(f"\n  [backend: {backend}]")
+
+
+# ============================================================================
+# Pure-PyTorch differentiable references (used by both fwd AND bwd tests)
+# ============================================================================
+
+
+def _ref_sinkhorn(logits: Tensor, num_iters: int, eps: float = 1e-6) -> Tensor:
+    row_max = logits.max(dim=-1, keepdim=True).values
+    M = torch.exp(logits - row_max)
+    for _ in range(num_iters):
+        M = M / M.sum(dim=-1, keepdim=True).clamp(min=eps)
+        M = M / M.sum(dim=-2, keepdim=True).clamp(min=eps)
+    return M
+
+
+def _ref_h_aggregate(x: Tensor, h_pre: Tensor) -> Tensor:
+    return (x * h_pre.unsqueeze(-1)).sum(dim=2)
+
+
+def _ref_h_post_bda(
+    h_res: Tensor, orig_res: Tensor, h_post: Tensor, x: Tensor, bias: Optional[Tensor]
+) -> Tensor:
+    s, b, n, C = orig_res.shape
+    mixed = torch.bmm(h_res.view(s * b, n, n), orig_res.view(s * b, n, C)).view(s, b, n, C)
+    x_exp = h_post.unsqueeze(-1) * x.unsqueeze(2)
+    out = x_exp + mixed
+    if bias is not None:
+        out = out + h_post.unsqueeze(-1) * bias.view(1, 1, 1, C)
+    return out
+
+
+def _ref_proj_rms(x: Tensor, weight: Tensor, eps: float = 1e-6):
+    proj = torch.matmul(x, weight.t())
+    norm = x.norm(dim=-1, keepdim=True)
+    K = x.shape[-1]
+    r = 1.0 / (norm / math.sqrt(K) + eps)
+    return proj, r
+
+
+# ============================================================================
+# Sinkhorn
+# ============================================================================
+
+
+class TestNativeSinkhorn:
+    """Tests for the native SinkhornKnopp implementation."""
+
+    @pytest.mark.parametrize("s,b,n,iters", [(2, 4, 4, 5), (1, 1, 2, 10)])
+    def test_fwd_bwd_vs_torch_reference(self, s, b, n, iters):
+        """native_sinkhorn fwd output and bwd grad must match the inline PyTorch reference."""
+        _info()
+        eps = 1e-6
+        data = _rand(s, b, n, n)
+        grad_out = _rand(s, b, n, n)
+
+        # -- native_sinkhorn path (autograd.Function) --
+        inp_f = data.clone().requires_grad_(True)
+        out_f = native_sinkhorn(inp_f, iters, eps)
+        out_f.backward(grad_out)
+        grad_f = inp_f.grad.clone()
+
+        # -- inline torch reference (fully differentiable) --
+        inp_r = data.clone().requires_grad_(True)
+        out_r = _ref_sinkhorn(inp_r, iters, eps)
+        out_r.backward(grad_out)
+        grad_r = inp_r.grad.clone()
+
+        torch.testing.assert_close(out_f, out_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(grad_f, grad_r, atol=BWD_ATOL, rtol=BWD_RTOL)
+
+
+class TestFusedSinkhorn:
+    @_require_cutile
+    @pytest.mark.parametrize("s,b,n,iters", [(2, 4, 4, 5), (1, 1, 2, 10)])
+    def test_fwd_bwd_vs_reference(self, s, b, n, iters):
+        """E2E: fused cuTile fwd output and bwd grad must match the PyTorch reference."""
+        from megatron.core.fusions.fused_mhc_kernels import fused_sinkhorn
+
+        _info()
+        eps = 1e-6
+        data = _rand(s, b, n, n)
+        grad_out = _rand(s, b, n, n)
+
+        # -- fused path --
+        inp_f = data.clone().requires_grad_(True)
+        out_f = fused_sinkhorn(inp_f, iters, eps)
+        out_f.backward(grad_out)
+        grad_f = inp_f.grad.clone()
+
+        # -- reference path (fully differentiable) --
+        inp_r = data.clone().requires_grad_(True)
+        out_r = _ref_sinkhorn(inp_r, iters, eps)
+        out_r.backward(grad_out)
+        grad_r = inp_r.grad.clone()
+
+        torch.testing.assert_close(out_f, out_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(grad_f, grad_r, atol=BWD_ATOL, rtol=BWD_RTOL)
+
+
+# ============================================================================
+# H_aggregate
+# ============================================================================
+
+
+class TestNativeHAggregate:
+    """Tests for native_h_aggregate."""
+
+    @pytest.mark.parametrize("s,b,n,C", [(2, 4, 4, 1024), (1, 1, 2, 256)])
+    def test_fwd_bwd_vs_torch_reference(self, s, b, n, C):
+        _info()
+        x_data = _rand(s, b, n, C)
+        h_data = _rand(s, b, n)
+        grad_out = _rand(s, b, C)
+
+        xf = x_data.clone().requires_grad_(True)
+        hf = h_data.clone().requires_grad_(True)
+        of = native_h_aggregate(xf, hf)
+        of.backward(grad_out)
+
+        xr = x_data.clone().requires_grad_(True)
+        hr = h_data.clone().requires_grad_(True)
+        oref = _ref_h_aggregate(xr, hr)
+        oref.backward(grad_out)
+
+        torch.testing.assert_close(of, oref, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(xf.grad, xr.grad, atol=BWD_ATOL, rtol=BWD_RTOL)
+        torch.testing.assert_close(hf.grad, hr.grad, atol=BWD_ATOL, rtol=BWD_RTOL)
+
+
+class TestFusedHAggregate:
+    @_require_cutile
+    @pytest.mark.parametrize("s,b,n,C", [(2, 4, 4, 1024), (1, 1, 2, 256)])
+    def test_fwd_bwd_vs_reference(self, s, b, n, C):
+        """E2E: fused cuTile fwd output and bwd grads must match the PyTorch reference."""
+        from megatron.core.fusions.fused_mhc_kernels import fused_h_aggregate
+
+        _info()
+        x_data = _rand(s, b, n, C)
+        h_data = _rand(s, b, n)
+        grad_out = _rand(s, b, C)
+
+        # -- fused path --
+        xf = x_data.clone().requires_grad_(True)
+        hf = h_data.clone().requires_grad_(True)
+        of = fused_h_aggregate(xf, hf)
+        of.backward(grad_out)
+
+        # -- reference path --
+        xr = x_data.clone().requires_grad_(True)
+        hr = h_data.clone().requires_grad_(True)
+        oref = _ref_h_aggregate(xr, hr)
+        oref.backward(grad_out)
+
+        torch.testing.assert_close(of, oref, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(xf.grad, xr.grad, atol=BWD_ATOL, rtol=BWD_RTOL)
+        torch.testing.assert_close(hf.grad, hr.grad, atol=BWD_ATOL, rtol=BWD_RTOL)
+
+
+# ============================================================================
+# H_post BDA
+# ============================================================================
+
+
+class TestNativeHPostBDA:
+    """Tests for native_h_post_bda."""
+
+    @pytest.mark.parametrize("with_bias", [True, False])
+    @pytest.mark.parametrize("s,b,n,C", [(2, 4, 4, 1024), (1, 2, 2, 256)])
+    def test_fwd_bwd_vs_torch_reference(self, s, b, n, C, with_bias):
+        _info()
+        hr_data = _rand(s, b, n, n)
+        orig_data = _rand(s, b, n, C)
+        hp_data = _rand(s, b, n)
+        x_data = _rand(s, b, C)
+        bias_data = _rand(C) if with_bias else None
+        grad_out = _rand(s, b, n, C)
+
+        def _make_inputs():
+            hr = hr_data.clone().requires_grad_(True)
+            orig = orig_data.clone().requires_grad_(True)
+            hp = hp_data.clone().requires_grad_(True)
+            x = x_data.clone().requires_grad_(True)
+            bi = bias_data.clone().requires_grad_(True) if with_bias else None
+            return hr, orig, hp, x, bi
+
+        hr_f, orig_f, hp_f, x_f, bi_f = _make_inputs()
+        out_f = native_h_post_bda(hr_f, orig_f, hp_f, x_f, bi_f)
+        out_f.backward(grad_out)
+
+        hr_r, orig_r, hp_r, x_r, bi_r = _make_inputs()
+        out_r = _ref_h_post_bda(hr_r, orig_r, hp_r, x_r, bi_r)
+        out_r.backward(grad_out)
+
+        torch.testing.assert_close(out_f, out_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        for name, gf, gr in [
+            ("h_res", hr_f.grad, hr_r.grad),
+            ("orig_res", orig_f.grad, orig_r.grad),
+            ("h_post", hp_f.grad, hp_r.grad),
+            ("x", x_f.grad, x_r.grad),
+        ]:
+            torch.testing.assert_close(
+                gf, gr, atol=BWD_ATOL, rtol=BWD_RTOL, msg=f"backward mismatch on {name}"
+            )
+        if with_bias:
+            torch.testing.assert_close(
+                bi_f.grad, bi_r.grad, atol=BWD_ATOL, rtol=BWD_RTOL, msg="backward mismatch on bias"
+            )
+
+
+class TestFusedHPostBDA:
+    @_require_cutile
+    @pytest.mark.parametrize("with_bias", [True, False])
+    @pytest.mark.parametrize("s,b,n,C", [(2, 4, 4, 1024), (1, 2, 2, 256)])
+    def test_fwd_bwd_vs_reference(self, s, b, n, C, with_bias):
+        """E2E: fused cuTile fwd output and bwd grads must match the PyTorch reference."""
+        from megatron.core.fusions.fused_mhc_kernels import fused_h_post_bda
+
+        _info()
+        hr_data = _rand(s, b, n, n)
+        orig_data = _rand(s, b, n, C)
+        hp_data = _rand(s, b, n)
+        x_data = _rand(s, b, C)
+        bias_data = _rand(C) if with_bias else None
+        grad_out = _rand(s, b, n, C)
+
+        def _make_inputs():
+            hr = hr_data.clone().requires_grad_(True)
+            orig = orig_data.clone().requires_grad_(True)
+            hp = hp_data.clone().requires_grad_(True)
+            x = x_data.clone().requires_grad_(True)
+            bi = bias_data.clone().requires_grad_(True) if with_bias else None
+            return hr, orig, hp, x, bi
+
+        # -- fused path --
+        hr_f, orig_f, hp_f, x_f, bi_f = _make_inputs()
+        out_f = fused_h_post_bda(hr_f, orig_f, hp_f, x_f, bi_f)
+        out_f.backward(grad_out)
+
+        # -- reference path --
+        hr_r, orig_r, hp_r, x_r, bi_r = _make_inputs()
+        out_r = _ref_h_post_bda(hr_r, orig_r, hp_r, x_r, bi_r)
+        out_r.backward(grad_out)
+
+        torch.testing.assert_close(out_f, out_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        for name, gf, gr in [
+            ("h_res", hr_f.grad, hr_r.grad),
+            ("orig_res", orig_f.grad, orig_r.grad),
+            ("h_post", hp_f.grad, hp_r.grad),
+            ("x", x_f.grad, x_r.grad),
+        ]:
+            torch.testing.assert_close(
+                gf, gr, atol=BWD_ATOL, rtol=BWD_RTOL, msg=f"backward mismatch on {name}"
+            )
+        if with_bias:
+            torch.testing.assert_close(
+                bi_f.grad, bi_r.grad, atol=BWD_ATOL, rtol=BWD_RTOL, msg="backward mismatch on bias"
+            )
+
+
+# ============================================================================
+# Proj RMS
+# ============================================================================
+
+
+class TestNativeProjRms:
+    """Tests for native_proj_rms."""
+
+    @pytest.mark.parametrize("M,N,K", [(256, 20, 4096), (64, 8, 512)])
+    def test_fwd_bwd_vs_torch_reference(self, M, N, K):
+        _info()
+        eps = 1e-6
+        x_data = _rand(M, K)
+        w_data = _rand(N, K)
+        grad_proj = _rand(M, N)
+        grad_r = _rand(M, 1)
+
+        xf = x_data.clone().requires_grad_(True)
+        wf = w_data.clone().requires_grad_(True)
+        proj_f, r_f = native_proj_rms(xf, wf, eps)
+        (proj_f * grad_proj + r_f * grad_r).sum().backward()
+
+        xr = x_data.clone().requires_grad_(True)
+        wr = w_data.clone().requires_grad_(True)
+        proj_r, r_r = _ref_proj_rms(xr, wr, eps)
+        (proj_r * grad_proj + r_r * grad_r).sum().backward()
+
+        torch.testing.assert_close(proj_f, proj_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(r_f, r_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(
+            xf.grad, xr.grad, atol=BWD_ATOL, rtol=BWD_RTOL, msg="backward mismatch on x"
+        )
+        torch.testing.assert_close(
+            wf.grad, wr.grad, atol=BWD_ATOL, rtol=BWD_RTOL, msg="backward mismatch on weight"
+        )
+
+
+class TestFusedProjRms:
+    @_require_cutile
+    @pytest.mark.parametrize("M,N,K", [(256, 20, 4096), (64, 8, 512)])
+    def test_fwd_bwd_vs_reference(self, M, N, K):
+        """E2E: fused cuTile fwd output and bwd grads must match the PyTorch reference."""
+        from megatron.core.fusions.fused_mhc_kernels import fused_proj_rms
+
+        _info()
+        eps = 1e-6
+        x_data = _rand(M, K)
+        w_data = _rand(N, K)
+        grad_proj = _rand(M, N)
+        grad_r = _rand(M, 1)
+
+        # -- fused path --
+        xf = x_data.clone().requires_grad_(True)
+        wf = w_data.clone().requires_grad_(True)
+        proj_f, r_f = fused_proj_rms(xf, wf, eps)
+        (proj_f * grad_proj + r_f * grad_r).sum().backward()
+
+        # -- reference path --
+        xr = x_data.clone().requires_grad_(True)
+        wr = w_data.clone().requires_grad_(True)
+        proj_r, r_r = _ref_proj_rms(xr, wr, eps)
+        (proj_r * grad_proj + r_r * grad_r).sum().backward()
+
+        torch.testing.assert_close(proj_f, proj_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(r_f, r_r, atol=FWD_ATOL, rtol=FWD_RTOL)
+        torch.testing.assert_close(
+            xf.grad, xr.grad, atol=BWD_ATOL, rtol=BWD_RTOL, msg="backward mismatch on x"
+        )
+        torch.testing.assert_close(
+            wf.grad, wr.grad, atol=BWD_ATOL, rtol=BWD_RTOL, msg="backward mismatch on weight"
+        )
+
+
+# ============================================================================
+# End-to-end pipeline (all four kernels chained)
+# ============================================================================
+
+
+class TestEndToEndNative:
+    """Full mHC pipeline using native modules.
+
+    proj_rms -> compute_h -> sinkhorn -> aggregate -> h_post_bda.
+    Compares the native modules against inline PyTorch reference.
+    """
+
+    def test_full_pipeline_fwd_bwd(self):
+        _info()
+        s, b, n, C = 2, 4, 4, 1024
+        eps = 1e-6
+        sinkhorn_iters = 5
+
+        hs_data = _rand(s, b, n * C)
+        w_data = _rand(n * n + 2 * n, n * C)
+        layer_out_data = _rand(s, b, C)
+        layer_bias_data = _rand(C)
+
+        def _run_native_modules():
+            hs = hs_data.clone().requires_grad_(True)
+            w = w_data.clone().requires_grad_(True)
+
+            x_2d = hs.reshape(s * b, n * C)
+            proj, r = native_proj_rms(x_2d, w, eps)
+            proj = proj.view(s, b, -1)
+            r = r.view(s, b, 1)
+
+            h = r * proj
+            h_pre = h[..., :n].sigmoid()
+            h_post = h[..., n : 2 * n].sigmoid() * 2
+            h_res_logits = h[..., 2 * n :]
+            h_res = native_sinkhorn(h_res_logits.view(s, b, n, n), sinkhorn_iters, eps)
+
+            aggregated = native_h_aggregate(hs.view(s, b, n, C), h_pre)
+
+            output = native_h_post_bda(
+                h_res, hs.view(s, b, n, C), h_post, layer_out_data, layer_bias_data
+            )
+
+            loss = output.sum() + aggregated.sum()
+            loss.backward()
+            return output.detach(), aggregated.detach(), hs.grad.clone()
+
+        def _run_inline_ref():
+            hs = hs_data.clone().requires_grad_(True)
+            w = w_data.clone().requires_grad_(True)
+
+            x_2d = hs.reshape(s * b, n * C)
+            proj, r = _ref_proj_rms(x_2d, w, eps)
+            proj = proj.view(s, b, -1)
+            r = r.view(s, b, 1)
+
+            h = r * proj
+            h_pre = h[..., :n].sigmoid()
+            h_post = h[..., n : 2 * n].sigmoid() * 2
+            h_res_logits = h[..., 2 * n :]
+            h_res = _ref_sinkhorn(h_res_logits.view(s, b, n, n), sinkhorn_iters, eps)
+
+            aggregated = _ref_h_aggregate(hs.view(s, b, n, C), h_pre)
+
+            output = _ref_h_post_bda(
+                h_res, hs.view(s, b, n, C), h_post, layer_out_data, layer_bias_data
+            )
+
+            loss = output.sum() + aggregated.sum()
+            loss.backward()
+            return output.detach(), aggregated.detach(), hs.grad.clone()
+
+        out_m, agg_m, grad_m = _run_native_modules()
+        out_r, agg_r, grad_r = _run_inline_ref()
+
+        torch.testing.assert_close(
+            agg_m, agg_r, atol=FWD_ATOL, rtol=FWD_RTOL, msg="aggregated output mismatch"
+        )
+        torch.testing.assert_close(
+            out_m, out_r, atol=FWD_ATOL, rtol=FWD_RTOL, msg="h_post_bda output mismatch"
+        )
+        _assert_cosine_similar(
+            grad_m, grad_r, COSINE_SIM_THRESH, msg="hidden_states grad (E2E backward)"
+        )
+
+
+class TestEndToEndFused:
+    """Full mHC pipeline using fused cuTile kernels (requires cuTile)."""
+
+    @_require_cutile
+    def test_full_pipeline_fwd_bwd(self):
+        from megatron.core.fusions.fused_mhc_kernels import (
+            fused_h_aggregate,
+            fused_h_post_bda,
+            fused_proj_rms,
+            fused_sinkhorn,
+        )
+
+        _info()
+        s, b, n, C = 2, 4, 4, 1024
+        eps = 1e-6
+        sinkhorn_iters = 5
+
+        hs_data = _rand(s, b, n * C)
+        w_data = _rand(n * n + 2 * n, n * C)
+        layer_out_data = _rand(s, b, C)
+        layer_bias_data = _rand(C)
+
+        def _run_fused():
+            hs = hs_data.clone().requires_grad_(True)
+            w = w_data.clone().requires_grad_(True)
+
+            x_2d = hs.reshape(s * b, n * C)
+            proj, r = fused_proj_rms(x_2d, w, eps)
+            proj = proj.view(s, b, -1)
+            r = r.view(s, b, 1)
+
+            h = r * proj
+            h_pre = h[..., :n].sigmoid()
+            h_post = h[..., n : 2 * n].sigmoid() * 2
+            h_res_logits = h[..., 2 * n :]
+            h_res = fused_sinkhorn(h_res_logits.view(s, b, n, n), sinkhorn_iters, eps)
+
+            aggregated = fused_h_aggregate(hs.view(s, b, n, C), h_pre)
+
+            output = fused_h_post_bda(
+                h_res, hs.view(s, b, n, C), h_post, layer_out_data, layer_bias_data
+            )
+
+            loss = output.sum() + aggregated.sum()
+            loss.backward()
+            return output.detach(), aggregated.detach(), hs.grad.clone()
+
+        def _run_ref():
+            hs = hs_data.clone().requires_grad_(True)
+            w = w_data.clone().requires_grad_(True)
+
+            x_2d = hs.reshape(s * b, n * C)
+            proj, r = _ref_proj_rms(x_2d, w, eps)
+            proj = proj.view(s, b, -1)
+            r = r.view(s, b, 1)
+
+            h = r * proj
+            h_pre = h[..., :n].sigmoid()
+            h_post = h[..., n : 2 * n].sigmoid() * 2
+            h_res_logits = h[..., 2 * n :]
+            h_res = _ref_sinkhorn(h_res_logits.view(s, b, n, n), sinkhorn_iters, eps)
+
+            aggregated = _ref_h_aggregate(hs.view(s, b, n, C), h_pre)
+
+            output = _ref_h_post_bda(
+                h_res, hs.view(s, b, n, C), h_post, layer_out_data, layer_bias_data
+            )
+
+            loss = output.sum() + aggregated.sum()
+            loss.backward()
+            return output.detach(), aggregated.detach(), hs.grad.clone()
+
+        out_f, agg_f, grad_f = _run_fused()
+        out_r, agg_r, grad_r = _run_ref()
+
+        torch.testing.assert_close(
+            agg_f, agg_r, atol=FWD_ATOL, rtol=FWD_RTOL, msg="aggregated output mismatch"
+        )
+        torch.testing.assert_close(
+            out_f, out_r, atol=FWD_ATOL, rtol=FWD_RTOL, msg="h_post_bda output mismatch"
+        )
+        _assert_cosine_similar(
+            grad_f, grad_r, COSINE_SIM_THRESH, msg="hidden_states grad (E2E backward)"
+        )
diff --git a/tests/unit_tests/fusions/test_mla_yarn_rope_apply.py b/tests/unit_tests/fusions/test_mla_yarn_rope_apply.py
index 1c8976bfcb6..1a0c19d5222 100644
--- a/tests/unit_tests/fusions/test_mla_yarn_rope_apply.py
+++ b/tests/unit_tests/fusions/test_mla_yarn_rope_apply.py
@@ -1,12 +1,18 @@
 # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 
+import warnings
+from unittest.mock import MagicMock, patch
+
 import pytest
 import torch
 
 from megatron.core.models.common.embeddings import apply_rotary_pos_emb
+from megatron.core.models.common.embeddings import rope_utils as rope_utils_module
 from megatron.core.models.common.embeddings.yarn_rotary_pos_embedding import YarnRotaryEmbedding
+from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.utils import is_torch_min_version
+from tests.unit_tests.test_utilities import Utils
 
 try:
     from megatron.core.fusions.fused_mla_yarn_rope_apply import (
@@ -91,7 +97,13 @@ def _test_fused_apply_mla_rope_for_q(input_format):
 
     no_pe, pe = torch.split(pytorch_fwd_input, [q_dim, emb_dim], dim=-1)
     pe_output = apply_rotary_pos_emb(
-        pe, freqs, transformer_config, cu_seqlens=cu_seqlens, mscale=mscale, cp_group=FakeCPGroup()
+        pe,
+        freqs,
+        transformer_config,
+        cu_seqlens=cu_seqlens,
+        mscale=mscale,
+        cp_group=FakeCPGroup(),
+        mla_rotary_interleaved=True,
     )
     pytorch_output = torch.concat([no_pe, pe_output], dim=-1)
     pytorch_output.backward(pytorch_bwd_input, retain_graph=True)
@@ -190,6 +202,7 @@ def _test_fused_apply_mla_rope_for_kv(input_format):
         cu_seqlens=cu_seqlens,
         mscale=mscale,
         cp_group=FakeCPGroup(),
+        mla_rotary_interleaved=True,
     )
     if input_format == "sbhd":
         pe_output = pe_output.expand(-1, -1, num_heads, -1)
@@ -253,3 +266,59 @@ def test_forward_backward_for_q(self, input_format):
 
     def test_forward_backward_for_kv(self, input_format):
         _test_fused_apply_mla_rope_for_kv(input_format)
+
+
+class TestApplyRotaryPosEmbMlaFusionConflict:
+    """Test apply_rotary_pos_emb: mla_rotary_interleaved vs apply_rope_fusion conflict."""
+
+    def setup_method(self):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+        self.seq_len = 16
+        self.num_heads = 2
+        self.kv_channels = 32
+        self.rot_dim = self.kv_channels
+
+    def teardown_method(self):
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_mla_rotary_interleaved_with_apply_rope_fusion_emits_warning_and_uses_unfused(self):
+        """When apply_rope_fusion=True and mla_rotary_interleaved=True, expect warning and unfused path."""
+        config = TransformerConfig(
+            num_attention_heads=self.num_heads,
+            num_layers=1,
+            apply_rope_fusion=True,
+            rotary_interleaved=False,
+        )
+        t = torch.randn(
+            self.seq_len, 1, self.num_heads, self.kv_channels, device="cuda", dtype=torch.float32
+        )
+        freqs = torch.randn(self.seq_len, 1, 1, self.rot_dim, device="cuda", dtype=torch.float32)
+
+        fused_mock = MagicMock(return_value=t.clone())
+        with (
+            patch.object(rope_utils_module, "fused_apply_rotary_pos_emb", fused_mock),
+            patch.object(
+                rope_utils_module,
+                "_apply_rotary_pos_emb_bshd",
+                wraps=rope_utils_module._apply_rotary_pos_emb_bshd,
+            ) as unfused_spy,
+        ):
+            with warnings.catch_warnings(record=True) as w:
+                warnings.simplefilter("always")
+                out = apply_rotary_pos_emb(t, freqs, config, mla_rotary_interleaved=True)
+            # Should have warned about MLA + fusion conflict
+            mla_fusion_warnings = [
+                x for x in w if "apply_rope_fusion does not support MLA-style" in str(x.message)
+            ]
+            assert (
+                len(mla_fusion_warnings) >= 1
+            ), "Expected warning when mla_rotary_interleaved and apply_rope_fusion both enabled"
+            # Fused kernel must not be used
+            fused_mock.assert_not_called()
+            # Unfused path must have been used
+            unfused_spy.assert_called_once()
+            call_kw = unfused_spy.call_args[1]
+            assert call_kw["mla_rotary_interleaved"] is True
+        assert out.shape == t.shape
diff --git a/tests/unit_tests/fusions/test_weighted_squared_relu_fusion.py b/tests/unit_tests/fusions/test_weighted_squared_relu_fusion.py
index 85755ac1de7..58907a39b7f 100644
--- a/tests/unit_tests/fusions/test_weighted_squared_relu_fusion.py
+++ b/tests/unit_tests/fusions/test_weighted_squared_relu_fusion.py
@@ -13,7 +13,7 @@
 def test_weighted_squared_relu_fusion(input_dtype):
     # Tolerances depend on dtype precision
     if input_dtype == torch.float32:
-        tols = dict(rtol=1.0e-6, atol=1.0e-6)
+        tols = dict(rtol=1.0e-5, atol=1.0e-5)
     elif input_dtype == torch.bfloat16:
         tols = dict(rtol=2.0e-2, atol=1.0e-3)
     else:
diff --git a/tests/unit_tests/models/test_experimental_attention_variant_module_specs.py b/tests/unit_tests/models/test_experimental_attention_variant_module_specs.py
new file mode 100644
index 00000000000..e3a589f1b97
--- /dev/null
+++ b/tests/unit_tests/models/test_experimental_attention_variant_module_specs.py
@@ -0,0 +1,660 @@
+# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from megatron.core.transformer.enums import AttnMaskType, LayerType
+from megatron.core.transformer.identity_op import IdentityOp
+from megatron.core.transformer.spec_utils import ModuleSpec
+from megatron.core.transformer.transformer_block import TransformerBlockSubmodules
+from megatron.core.transformer.transformer_layer import (
+    HyperConnectionTransformerLayer,
+    TransformerLayer,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers: fake backend and config builders
+# ---------------------------------------------------------------------------
+
+
+class _FakeLinear:
+    pass
+
+
+class _FakeColumnParallelLinear:
+    pass
+
+
+class _FakeRowParallelLinear:
+    pass
+
+
+class _FakeLayerNormColumnParallelLinear:
+    pass
+
+
+class _FakeLayerNorm:
+    pass
+
+
+class _FakeQKNorm:
+    pass
+
+
+class _FakeCoreAttention:
+    pass
+
+
+def _make_backend(fuse_layernorm=True):
+    """Return a mock BackendSpecProvider with deterministic return values."""
+    backend = MagicMock()
+    backend.linear.return_value = _FakeLinear
+    backend.column_parallel_linear.return_value = _FakeColumnParallelLinear
+    backend.row_parallel_linear.return_value = _FakeRowParallelLinear
+    backend.column_parallel_layer_norm_linear.return_value = _FakeLayerNormColumnParallelLinear
+    backend.fuse_layernorm_and_linear.return_value = fuse_layernorm
+    backend.core_attention.return_value = _FakeCoreAttention
+
+    def _layer_norm(rms_norm=False, for_qk=False):
+        return _FakeQKNorm if for_qk else _FakeLayerNorm
+
+    backend.layer_norm.side_effect = _layer_norm
+    return backend
+
+
+def _make_config(**overrides):
+    """Return a mock TransformerConfig with sane defaults."""
+    defaults = dict(
+        num_layers=4,
+        normalization="RMSNorm",
+        qk_layernorm=False,
+        multi_latent_attention=False,
+        qk_l2_norm=False,
+        transformer_impl="transformer_engine",
+        use_kitchen=False,
+        experimental_attention_variant=None,
+        linear_attention_freq=None,
+        moe_layer_freq=1,
+        num_moe_experts=None,
+        moe_grouped_gemm=False,
+        moe_use_legacy_grouped_gemm=False,
+        use_te_activation_func=False,
+        pipeline_model_parallel_size=1,
+        pipeline_model_parallel_layout=None,
+        use_kitchen_attention=False,
+        kitchen_attention_backend="sdpa",
+        fallback_to_eager_attn=False,
+        enable_hyper_connections=False,
+    )
+    defaults.update(overrides)
+    cfg = MagicMock()
+    for k, v in defaults.items():
+        setattr(cfg, k, v)
+    return cfg
+
+
+# ===================================================================
+# Tests for is_linear_attention_variant
+# ===================================================================
+
+
+class TestIsLinearAttentionVariant:
+    @staticmethod
+    def _fn(variant):
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            is_linear_attention_variant,
+        )
+
+        return is_linear_attention_variant(variant)
+
+    @pytest.mark.parametrize(
+        "variant, expected",
+        [("gated_delta_net", True), ("dsa", False), (None, False), ("some_unknown_variant", False)],
+    )
+    def test_variants(self, variant, expected):
+        """Validate linear-attention variant classification across supported and unsupported names."""
+        assert self._fn(variant) is expected
+
+
+# ===================================================================
+# Tests for get_moe_layer_pattern
+# ===================================================================
+
+
+class TestGetMoeLayerPattern:
+    @staticmethod
+    def _fn(config):
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_moe_layer_pattern,
+        )
+
+        return get_moe_layer_pattern(config)
+
+    @pytest.mark.parametrize(
+        "num_layers, freq, expected",
+        [(4, 1, [1, 1, 1, 1]), (6, 2, [1, 0, 1, 0, 1, 0]), (6, 3, [1, 0, 0, 1, 0, 0])],
+    )
+    def test_int_freq(self, num_layers, freq, expected):
+        """Verify integer moe_layer_freq is expanded into the expected per-layer MoE pattern."""
+        cfg = _make_config(num_layers=num_layers, moe_layer_freq=freq)
+        assert self._fn(cfg) == expected
+
+    def test_list_freq(self):
+        """Verify an explicit list pattern is used as-is."""
+        pattern = [1, 0, 1, 0]
+        cfg = _make_config(num_layers=4, moe_layer_freq=pattern)
+        assert self._fn(cfg) == pattern
+
+    def test_list_freq_wrong_length_raises(self):
+        """Verify a list with mismatched length fails fast."""
+        cfg = _make_config(num_layers=4, moe_layer_freq=[1, 0])
+        with pytest.raises(AssertionError, match="Invalid length"):
+            self._fn(cfg)
+
+    def test_invalid_type_raises(self):
+        """Verify unsupported moe_layer_freq types raise ValueError."""
+        cfg = _make_config(num_layers=4, moe_layer_freq="bad")
+        with pytest.raises(ValueError, match="Invalid moe_layer_freq"):
+            self._fn(cfg)
+
+
+# ===================================================================
+# Tests for get_linear_attention_pattern
+# ===================================================================
+
+
+class TestGetLinearAttentionPattern:
+    @staticmethod
+    def _fn(config):
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_linear_attention_pattern,
+        )
+
+        return get_linear_attention_pattern(config)
+
+    @pytest.mark.parametrize(
+        "num_layers, freq, expected",
+        [
+            # Every 4th layer (1-indexed) is SDPA (0), the rest are LA (1)
+            (8, 4, [1, 1, 1, 0, 1, 1, 1, 0]),
+            (4, 2, [1, 0, 1, 0]),
+            (3, 1, [0, 0, 0]),
+        ],
+    )
+    def test_int_freq(self, num_layers, freq, expected):
+        """Verify integer linear_attention_freq is expanded into the expected LA/SDPA pattern."""
+        cfg = _make_config(num_layers=num_layers, linear_attention_freq=freq)
+        assert self._fn(cfg) == expected
+
+    def test_list_freq(self):
+        """Verify an explicit linear-attention pattern list is used directly."""
+        pattern = [1, 0, 1, 0]
+        cfg = _make_config(num_layers=4, linear_attention_freq=pattern)
+        assert self._fn(cfg) == pattern
+
+    def test_list_freq_wrong_length_raises(self):
+        """Verify list length validation for linear_attention_freq."""
+        cfg = _make_config(num_layers=4, linear_attention_freq=[1, 0, 1])
+        with pytest.raises(AssertionError, match="Invalid length"):
+            self._fn(cfg)
+
+    def test_none_for_non_linear_variant(self):
+        """Verify non-linear variants default to all-standard attention when freq is None."""
+        cfg = _make_config(
+            num_layers=4, linear_attention_freq=None, experimental_attention_variant="dsa"
+        )
+        assert self._fn(cfg) == [0, 0, 0, 0]
+
+    def test_none_for_linear_variant_raises(self):
+        """Verify linear variants require linear_attention_freq to be explicitly set."""
+        cfg = _make_config(
+            num_layers=4,
+            linear_attention_freq=None,
+            experimental_attention_variant="gated_delta_net",
+        )
+        with pytest.raises(ValueError, match="linear_attention_freq is None"):
+            self._fn(cfg)
+
+    def test_invalid_type_raises(self):
+        """Verify unsupported linear_attention_freq types raise ValueError."""
+        cfg = _make_config(num_layers=4, linear_attention_freq=3.14)
+        with pytest.raises(ValueError, match="Invalid linear_attention_freq"):
+            self._fn(cfg)
+
+
+# ===================================================================
+# Tests for get_gated_delta_net_module_spec
+# ===================================================================
+
+
+class TestGetGatedDeltaNetModuleSpec:
+    def test_returns_correct_module_spec(self):
+        """Verify the top-level module spec targets GatedDeltaNet with expected metainfo."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_gated_delta_net_module_spec,
+        )
+        from megatron.core.ssm.gated_delta_net import GatedDeltaNet
+
+        backend = _make_backend()
+        cfg = _make_config(normalization="RMSNorm")
+        spec = get_gated_delta_net_module_spec(cfg, backend=backend)
+
+        assert isinstance(spec, ModuleSpec)
+        assert spec.module is GatedDeltaNet
+        assert spec.metainfo == {"fuse_input_layernorm": True}
+
+    def test_submodules_use_backend_modules(self):
+        """Verify backend-provided projection/norm modules are wired into submodules."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_gated_delta_net_module_spec,
+        )
+
+        backend = _make_backend()
+        cfg = _make_config(normalization="RMSNorm")
+        spec = get_gated_delta_net_module_spec(cfg, backend=backend)
+
+        subs = spec.submodules
+        assert subs.in_proj == _FakeLayerNormColumnParallelLinear
+        assert subs.out_proj == _FakeRowParallelLinear
+        backend.layer_norm.assert_any_call(rms_norm=True, for_qk=False)
+
+    def test_layer_norm_normalization(self):
+        """Verify LayerNorm mode passes rms_norm=False to backend.layer_norm."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_gated_delta_net_module_spec,
+        )
+
+        backend = _make_backend()
+        cfg = _make_config(normalization="LayerNorm")
+        get_gated_delta_net_module_spec(cfg, backend=backend)
+        backend.layer_norm.assert_any_call(rms_norm=False, for_qk=False)
+
+    def test_backend_auto_resolved_when_none(self):
+        """Verify backend is auto-resolved when caller does not pass one."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_gated_delta_net_module_spec,
+        )
+
+        cfg = _make_config(normalization="RMSNorm")
+        with patch(
+            "megatron.core.models.gpt.experimental_attention_variant_module_specs"
+            "._get_backend_spec_provider",
+            return_value=_make_backend(),
+        ):
+            spec = get_gated_delta_net_module_spec(cfg, backend=None)
+            assert isinstance(spec, ModuleSpec)
+
+
+# ===================================================================
+# Tests for get_dsa_module_spec_for_backend
+# ===================================================================
+
+
+class TestGetDsaModuleSpec:
+    def _call(self, cfg=None, backend=None):
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_dsa_module_spec_for_backend,
+        )
+
+        if cfg is None:
+            cfg = _make_config(multi_latent_attention=True, qk_l2_norm=False, qk_layernorm=True)
+        if backend is None:
+            backend = _make_backend()
+        return get_dsa_module_spec_for_backend(cfg, backend=backend)
+
+    def test_requires_multi_latent_attention(self):
+        """Verify DSA path rejects configs without MLA enabled."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_dsa_module_spec_for_backend,
+        )
+
+        cfg = _make_config(multi_latent_attention=False, qk_l2_norm=False)
+        with pytest.raises(AssertionError, match="only MLA supports"):
+            get_dsa_module_spec_for_backend(cfg, backend=_make_backend())
+
+    def test_rejects_qk_l2_norm(self):
+        """Verify unsupported qk_l2_norm setting is rejected for DSA+MLA."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_dsa_module_spec_for_backend,
+        )
+
+        cfg = _make_config(multi_latent_attention=True, qk_l2_norm=True)
+        with pytest.raises(AssertionError, match="qk_l2_norm is not supported"):
+            get_dsa_module_spec_for_backend(cfg, backend=_make_backend())
+
+    def test_returns_mla_self_attention_spec(self):
+        """Verify the returned attention module is MLA self-attention with causal mask."""
+        from megatron.core.transformer.multi_latent_attention import MLASelfAttention
+
+        spec = self._call()
+        assert spec.module is MLASelfAttention
+        assert spec.params == {"attn_mask_type": AttnMaskType.causal}
+        assert spec.metainfo == {"fuse_input_layernorm": False}
+
+    def test_core_attention_is_dsa(self):
+        """Verify MLA core_attention is wrapped with DSAttention."""
+        from megatron.core.transformer.experimental_attention_variant.dsa import DSAttention
+
+        spec = self._call()
+        core = spec.submodules.core_attention
+        assert core.module is DSAttention
+
+    def test_dsa_indexer_structure(self):
+        """Verify DSA indexer wiring uses expected backend linear/norm modules."""
+        from megatron.core.transformer.experimental_attention_variant.dsa import DSAIndexer
+
+        spec = self._call()
+        indexer = spec.submodules.core_attention.submodules.indexer
+        assert indexer.module is DSAIndexer
+        subs = indexer.submodules
+        assert subs.linear_wq_b == _FakeLinear
+        assert subs.linear_wk == _FakeLinear
+        assert subs.k_norm == _FakeQKNorm
+        assert subs.linear_weights_proj == _FakeLinear
+
+    @pytest.mark.parametrize("normalization", ["RMSNorm", "LayerNorm"])
+    def test_qk_layernorm_enabled(self, normalization):
+        """Verify q/kv layernorm uses backend.layer_norm(rms_norm=..., for_qk=True)."""
+        backend = _make_backend()
+        cfg = _make_config(
+            multi_latent_attention=True,
+            qk_l2_norm=False,
+            qk_layernorm=True,
+            normalization=normalization,
+        )
+        spec = self._call(cfg=cfg, backend=backend)
+        expected_rms = normalization == "RMSNorm"
+        assert spec.submodules.q_layernorm == _FakeQKNorm
+        assert spec.submodules.kv_layernorm == _FakeQKNorm
+        # Both point to the same qk_norm object
+        assert spec.submodules.q_layernorm is spec.submodules.kv_layernorm
+        backend.layer_norm.assert_any_call(rms_norm=expected_rms, for_qk=True)
+
+    def test_qk_layernorm_disabled(self):
+        """Verify q/kv layernorm becomes IdentityOp, skipping backend.layer_norm for qk."""
+        backend = _make_backend()
+        cfg = _make_config(multi_latent_attention=True, qk_l2_norm=False, qk_layernorm=False)
+        spec = self._call(cfg=cfg, backend=backend)
+        assert spec.submodules.q_layernorm is IdentityOp
+        assert spec.submodules.kv_layernorm is IdentityOp
+        # backend.layer_norm is still called for the indexer k_norm (for_qk=True at line 94),
+        # but NOT for the outer qk_norm (line 105-107 takes the else branch).
+        # Exactly one for_qk=True call should exist (from the indexer, not from qk_norm).
+        qk_calls = [c for c in backend.layer_norm.call_args_list if c.kwargs.get("for_qk")]
+        assert (
+            len(qk_calls) == 1
+        ), f"Expected 1 for_qk=True call (indexer only), got {len(qk_calls)}"
+
+    def test_linear_projections(self):
+        """Verify Q/KV projection slots and backend.column_parallel_linear call count."""
+        backend = _make_backend()
+        cfg = _make_config(multi_latent_attention=True, qk_l2_norm=False, qk_layernorm=True)
+        spec = self._call(cfg=cfg, backend=backend)
+        subs = spec.submodules
+        assert subs.linear_q_proj == _FakeColumnParallelLinear
+        assert subs.linear_q_down_proj == _FakeLinear
+        assert subs.linear_q_up_proj == _FakeColumnParallelLinear
+        assert subs.linear_kv_down_proj == _FakeLinear
+        assert subs.linear_kv_up_proj == _FakeColumnParallelLinear
+        assert subs.linear_proj == _FakeRowParallelLinear
+        # column_parallel_linear() is called exactly 3 times (q_proj, q_up_proj, kv_up_proj)
+        assert backend.column_parallel_linear.call_count == 3
+        assert backend.row_parallel_linear.call_count == 1
+
+
+# ===================================================================
+# Tests for get_experimental_attention_variant_module_spec
+# ===================================================================
+
+
+class TestGetExperimentalAttentionVariantModuleSpec:
+    MODULE = "megatron.core.models.gpt.experimental_attention_variant_module_specs"
+
+    @pytest.mark.parametrize(
+        "variant, target_fn",
+        [
+            ("gated_delta_net", "get_gated_delta_net_module_spec"),
+            ("dsa", "get_dsa_module_spec_for_backend"),
+        ],
+    )
+    def test_dispatches_to_variant_handler(self, variant, target_fn):
+        """Verify dispatcher routes each variant name to its corresponding builder function."""
+        backend = _make_backend()
+        cfg = _make_config(experimental_attention_variant=variant, normalization="RMSNorm")
+        with patch(f"{self.MODULE}.{target_fn}") as mock_fn:
+            mock_fn.return_value = ModuleSpec(module=MagicMock)
+            from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+                get_experimental_attention_variant_module_spec,
+            )
+
+            result = get_experimental_attention_variant_module_spec(cfg, backend=backend)
+            mock_fn.assert_called_once_with(config=cfg, backend=backend)
+            assert result is mock_fn.return_value
+
+    def test_invalid_variant_raises(self):
+        """Verify unknown variant names raise a clear ValueError."""
+        cfg = _make_config(experimental_attention_variant="unknown")
+        with pytest.raises(ValueError, match="Invalid experimental attention variant"):
+            from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+                get_experimental_attention_variant_module_spec,
+            )
+
+            get_experimental_attention_variant_module_spec(cfg, backend=_make_backend())
+
+
+# ===================================================================
+# Tests for get_transformer_layer_with_experimental_attention_variant_spec
+# ===================================================================
+
+
+class TestGetTransformerLayerWithExperimentalAttentionVariantSpec:
+    MODULE = "megatron.core.models.gpt.experimental_attention_variant_module_specs"
+
+    def _make_attention_spec(self, fuse_input_layernorm=True):
+        """Construct a mock attention spec with configurable fuse metadata."""
+        return ModuleSpec(module=MagicMock, metainfo={"fuse_input_layernorm": fuse_input_layernorm})
+
+    def _make_mlp_spec(self, fuse_pre_mlp_layernorm=True):
+        """Construct a mock MLP spec with configurable fuse metadata."""
+        return ModuleSpec(
+            module=MagicMock, metainfo={"fuse_pre_mlp_layernorm": fuse_pre_mlp_layernorm}
+        )
+
+    def test_all_experimental_no_moe(self):
+        """Verify all layers use experimental attention and dense MLP when no MoE is configured."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_transformer_layer_with_experimental_attention_variant_spec,
+        )
+
+        cfg = _make_config(
+            num_layers=4,
+            experimental_attention_variant="dsa",
+            num_moe_experts=None,
+            normalization="RMSNorm",
+        )
+        backend = _make_backend()
+        attn_spec = self._make_attention_spec(fuse_input_layernorm=False)
+        mlp_spec = self._make_mlp_spec(fuse_pre_mlp_layernorm=True)
+
+        with (
+            patch(
+                f"{self.MODULE}.get_experimental_attention_variant_module_spec",
+                return_value=attn_spec,
+            ),
+            patch(f"{self.MODULE}._get_dense_mlp_module_spec", return_value=mlp_spec),
+        ):
+            specs = get_transformer_layer_with_experimental_attention_variant_spec(
+                cfg, backend=backend
+            )
+
+        assert len(specs) == 4
+        for s in specs:
+            # Each layer should share the same selected module specs in this setup.
+            assert s.module is TransformerLayer
+            assert s.submodules.self_attention is attn_spec
+            assert s.submodules.mlp is mlp_spec
+
+    def test_hybrid_attention_pattern(self):
+        """Verify attention alternates between experimental and standard specs per pattern."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_transformer_layer_with_experimental_attention_variant_spec,
+        )
+
+        cfg = _make_config(
+            num_layers=4,
+            experimental_attention_variant="gated_delta_net",
+            linear_attention_freq=2,
+            num_moe_experts=None,
+            normalization="RMSNorm",
+        )
+        backend = _make_backend()
+        exp_attn_spec = self._make_attention_spec(fuse_input_layernorm=True)
+        std_attn_spec = self._make_attention_spec(fuse_input_layernorm=False)
+        mlp_spec = self._make_mlp_spec(fuse_pre_mlp_layernorm=True)
+
+        with (
+            patch(
+                f"{self.MODULE}.get_experimental_attention_variant_module_spec",
+                return_value=exp_attn_spec,
+            ),
+            patch(f"{self.MODULE}._get_self_attention_module_spec", return_value=std_attn_spec),
+            patch(f"{self.MODULE}._get_dense_mlp_module_spec", return_value=mlp_spec),
+        ):
+            specs = get_transformer_layer_with_experimental_attention_variant_spec(
+                cfg, backend=backend
+            )
+
+        assert len(specs) == 4
+        # Pattern for linear_attention_freq=2: [1, 0, 1, 0]
+        assert specs[0].submodules.self_attention is exp_attn_spec
+        assert specs[1].submodules.self_attention is std_attn_spec
+        assert specs[2].submodules.self_attention is exp_attn_spec
+        assert specs[3].submodules.self_attention is std_attn_spec
+
+    def test_hybrid_moe_pattern_with_mhc(self):
+        """Verify MLP alternates between MoE and dense specs per moe_layer_freq pattern."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_transformer_layer_with_experimental_attention_variant_spec,
+        )
+
+        cfg = _make_config(
+            num_layers=4,
+            experimental_attention_variant="dsa",
+            num_moe_experts=8,
+            moe_layer_freq=2,
+            normalization="RMSNorm",
+            enable_hyper_connections=True,
+        )
+        backend = _make_backend()
+        attn_spec = self._make_attention_spec(fuse_input_layernorm=False)
+        moe_spec = self._make_mlp_spec(fuse_pre_mlp_layernorm=False)
+        dense_spec = self._make_mlp_spec(fuse_pre_mlp_layernorm=True)
+
+        with (
+            patch(
+                f"{self.MODULE}.get_experimental_attention_variant_module_spec",
+                return_value=attn_spec,
+            ),
+            patch(f"{self.MODULE}._get_moe_module_spec", return_value=moe_spec),
+            patch(f"{self.MODULE}._get_dense_mlp_module_spec", return_value=dense_spec),
+        ):
+            specs = get_transformer_layer_with_experimental_attention_variant_spec(
+                cfg, backend=backend
+            )
+
+        # moe_layer_freq=2 -> [1, 0, 1, 0]
+        assert specs[0].submodules.mlp is moe_spec
+        assert specs[1].submodules.mlp is dense_spec
+        assert specs[2].submodules.mlp is moe_spec
+        assert specs[3].submodules.mlp is dense_spec
+        for s in specs:
+            assert s.module is HyperConnectionTransformerLayer
+
+
+# ===================================================================
+# Tests for get_transformer_block_with_experimental_attention_variant_spec
+# ===================================================================
+
+
+class TestGetTransformerBlockWithExperimentalAttentionVariantSpec:
+    MODULE = "megatron.core.models.gpt.experimental_attention_variant_module_specs"
+
+    @pytest.mark.parametrize(
+        "num_layers,pp_size,vp_stage,pp_rank,use_layout,offset,num_layers_to_build,layout_ids,expected_ids",
+        [
+            # no pipeline split
+            (4, 1, None, None, False, 0, 4, None, [0, 1, 2, 3]),
+            # pp split (rank 1 gets [4,5,6,7])
+            (8, 2, None, 1, False, 4, 4, None, [4, 5, 6, 7]),
+            # vpp + pp split (example stage)
+            (8, 2, 1, 0, False, 2, 2, None, [2, 3]),
+            # explicit pipeline layout wins over offset/num_layers
+            (8, 2, 0, 0, True, None, None, [0, 3, 5], [0, 3, 5]),
+        ],
+    )
+    def test_get_transformer_block_with_experimental_attention_variant_spec(
+        self,
+        num_layers,
+        pp_size,
+        vp_stage,
+        pp_rank,
+        use_layout,
+        offset,
+        num_layers_to_build,
+        layout_ids,
+        expected_ids,
+    ):
+        """Verify transformer block layer slicing and vp/pp argument forwarding."""
+        from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
+            get_transformer_block_with_experimental_attention_variant_spec,
+        )
+
+        mock_layout = MagicMock() if use_layout else None
+        if mock_layout is not None:
+            # When layout is provided, it should fully control local layer selection.
+            mock_layout.get_layer_id_list.return_value = layout_ids
+
+        cfg = _make_config(
+            num_layers=num_layers,
+            pipeline_model_parallel_size=pp_size,
+            pipeline_model_parallel_layout=mock_layout,
+            normalization="RMSNorm",
+        )
+        backend = _make_backend()
+        fake_layer_specs = [
+            ModuleSpec(module=TransformerLayer, submodules=MagicMock()) for _ in range(num_layers)
+        ]
+
+        with (
+            patch(f"{self.MODULE}._get_backend_spec_provider", return_value=backend),
+            patch(
+                f"{self.MODULE}.get_transformer_layer_with_experimental_attention_variant_spec",
+                return_value=fake_layer_specs,
+            ),
+        ):
+            if use_layout:
+                result = get_transformer_block_with_experimental_attention_variant_spec(
+                    cfg, vp_stage=vp_stage, pp_rank=pp_rank
+                )
+                mock_layout.get_layer_id_list.assert_called_once_with(
+                    layer_type=LayerType.decoder, vp_stage=vp_stage, pp_rank=pp_rank
+                )
+            else:
+                # Without explicit layout, slicing comes from offset + num_layers_to_build.
+                with (
+                    patch(
+                        f"{self.MODULE}.get_transformer_layer_offset", return_value=offset
+                    ) as mock_offset,
+                    patch(
+                        f"{self.MODULE}.get_num_layers_to_build", return_value=num_layers_to_build
+                    ) as mock_num_layers,
+                ):
+                    result = get_transformer_block_with_experimental_attention_variant_spec(
+                        cfg, vp_stage=vp_stage, pp_rank=pp_rank
+                    )
+                mock_offset.assert_called_once_with(cfg, vp_stage=vp_stage, pp_rank=pp_rank)
+                mock_num_layers.assert_called_once_with(cfg, vp_stage=vp_stage, pp_rank=pp_rank)
+
+        assert isinstance(result, TransformerBlockSubmodules)
+        assert result.layer_specs == [fake_layer_specs[i] for i in expected_ids]
diff --git a/tests/unit_tests/models/test_gpt_layer_specs.py b/tests/unit_tests/models/test_gpt_layer_specs.py
new file mode 100644
index 00000000000..bfa86fd0241
--- /dev/null
+++ b/tests/unit_tests/models/test_gpt_layer_specs.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import pytest
+
+from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_layer_local_spec,
+    get_gpt_layer_with_transformer_engine_spec,
+)
+from megatron.core.transformer.hyper_connection import HyperConnectionModule
+from megatron.core.transformer.identity_op import IdentityOp
+from megatron.core.transformer.transformer_layer import (
+    HyperConnectionTransformerLayer,
+    TransformerLayer,
+)
+
+_TE = get_gpt_layer_with_transformer_engine_spec
+_LOCAL = get_gpt_layer_local_spec
+_HC = HyperConnectionTransformerLayer
+_HC_MOD = HyperConnectionModule
+_TL = TransformerLayer
+_ID = IdentityOp
+
+
+class TestGptLayerSpecsHyperConnection:
+    """Test that enable_hyper_connection controls module types in layer specs."""
+
+    @pytest.mark.parametrize(
+        "factory,kwargs,expected_module,expected_hc",
+        [
+            (_TE, {}, _TL, _ID),
+            (_TE, {"enable_hyper_connection": True}, _HC, _HC_MOD),
+            (_TE, {"enable_hyper_connection": False}, _TL, _ID),
+            (_TE, {"multi_latent_attention": True, "enable_hyper_connection": False}, _TL, _ID),
+            (_TE, {"multi_latent_attention": True, "enable_hyper_connection": True}, _HC, _HC_MOD),
+            (_LOCAL, {}, _TL, _ID),
+            (_LOCAL, {"enable_hyper_connection": True}, _HC, _HC_MOD),
+            (_LOCAL, {"enable_hyper_connection": False}, _TL, _ID),
+            (_LOCAL, {"multi_latent_attention": True, "enable_hyper_connection": False}, _TL, _ID),
+            (
+                _LOCAL,
+                {"multi_latent_attention": True, "enable_hyper_connection": True},
+                _HC,
+                _HC_MOD,
+            ),
+            (_LOCAL, {"normalization": "RMSNorm", "enable_hyper_connection": False}, _TL, _ID),
+            (_LOCAL, {"normalization": "RMSNorm", "enable_hyper_connection": True}, _HC, _HC_MOD),
+        ],
+        ids=[
+            "te_default",
+            "te_enable",
+            "te_disable",
+            "te_mla_disable",
+            "te_mla_enable",
+            "local_default",
+            "local_enable",
+            "local_disable",
+            "local_mla_disable",
+            "local_mla_enable",
+            "local_rmsnorm_disable",
+            "local_rmsnorm_enable",
+        ],
+    )
+    def test_hyper_connection_spec(self, factory, kwargs, expected_module, expected_hc):
+        spec = factory(**kwargs)
+        assert spec.module is expected_module
+        assert spec.submodules.self_attention_hyper_connection is expected_hc
+        assert spec.submodules.mlp_hyper_connection is expected_hc
diff --git a/tests/unit_tests/models/test_mamba_moe_model.py b/tests/unit_tests/models/test_mamba_moe_model.py
index 534ed103efa..a32776a9424 100644
--- a/tests/unit_tests/models/test_mamba_moe_model.py
+++ b/tests/unit_tests/models/test_mamba_moe_model.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import hashlib
 import inspect
@@ -16,6 +16,7 @@
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.enums import AttnBackend
+from megatron.core.transformer.moe.moe_logging import destroy_moe_metrics_tracker
 from megatron.training.arguments import core_transformer_config_from_args, parse_args, validate_args
 from megatron.training.global_vars import (
     destroy_global_vars,
@@ -75,6 +76,7 @@
     "deallocate_pipeline_outputs": True,
     "defer_embedding_wgrad_compute": False,
     "delay_wgrad_compute": False,
+    "overlap_dispatch_backward_with_experts_wgrad": False,
     "deterministic_mode": False,
     "disable_bf16_reduced_precision_matmul": False,
     "disable_parameter_transpose_cache": False,
@@ -88,6 +90,7 @@
     "embedding_init_method_std": 0.014,
     "enable_autocast": False,
     "enable_cuda_graph": False,
+    "enable_hyper_connections": False,
     "ep_overlap_early_attn_memory_release": False,
     "experimental_attention_variant": None,
     "expert_model_parallel_size": 4,
@@ -150,6 +153,9 @@
     "mamba_state_dim": 128,
     "masked_softmax_fusion": True,
     "memory_efficient_layer_norm": False,
+    "mhc_init_gating_factor": 0.01,
+    "mhc_recompute_layer_num": None,
+    "mhc_sinkhorn_iterations": 20,
     "microbatch_group_size_per_vp_stage": 1,
     "mlp_chunks_for_prefill": 1,
     "moe_apply_probs_on_input": False,
@@ -218,6 +224,7 @@
     "num_microbatches_with_partial_activation_checkpoints": None,
     "num_moe_experts": 128,
     "num_query_groups": 2,
+    "num_residual_streams": 4,
     "output_layer_init_method": {},
     "overlap_moe_expert_parallel_comm": False,
     "overlap_p2p_comm": False,
@@ -264,6 +271,7 @@
     "tp_only_amax_red": False,
     "transformer_impl": "transformer_engine",
     "use_cpu_initialization": None,
+    "use_fused_mhc": False,
     "use_fused_weighted_squared_relu": False,
     "use_inference_optimized_layers": False,
     "use_kitchen": False,
@@ -281,12 +289,21 @@
     "fine_grained_activation_offloading": False,
     "min_offloaded_tensor_size": 1024 * 1024,
     "offload_modules": [],
+    "delay_offload_until_cuda_graph": False,
+    "delta_offload_bytes_across_pp_ranks": 0,
+    "activation_offload_fraction": 1.0,
+    "dynamic_context_parallel": False,
     "hybrid_context_parallel": False,
     "max_seqlen_per_dp_cp_rank": None,
+    "fallback_to_eager_attn": False,
     "inference_disable_triton_nvls_kernels": False,
-    "moe_router_force_biased": None,
     "inference_grouped_gemm_backend": "auto",
     "inference_moe_disable_fused_quant_kernels": False,
+    "linear_attention_type": None,
+    "moe_mlp_glu_interleave_size": None,
+    "moe_router_force_biased": None,
+    "sequence_packing_scheduler": None,
+    "use_transformer_engine_op_fuser": False,
 }
 # Fields to ignore entirely (ephemeral, environment-specific, very large).
 SKIP_FIELDS = set()
@@ -483,6 +500,7 @@ def create_test_args(self):
     def setup_method(self, method):
 
         os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
+        destroy_moe_metrics_tracker()
         args = self.create_test_args()
         set_args(args)
 
diff --git a/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py b/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
index 2171e8db810..964e83464da 100644
--- a/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
+++ b/tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
@@ -319,7 +319,6 @@ def test_gpt_fine_grained_activation_offloading_correctness_and_memory(
         ("alltoall", True, ["mlp_norm"]),
         ("alltoall", False, ["expert_fc1"]),
         ("alltoall", False, ["moe_act"]),
-        ("alltoall", False, ["mlp_norm", "expert_fc1", "moe_act"]),
         (
             "alltoall",
             True,
@@ -569,3 +568,338 @@ def _run_schedule_1f1b_two_microbatches(
                     )
     finally:
         Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# CUDA Graph + Fine-grained Activation Offloading Tests
+# =============================================================================
+
+
+def _build_gpt_model_with_cuda_graph(
+    *,
+    seed: int,
+    num_layers: int,
+    hidden_size: int,
+    num_attention_heads: int,
+    vocab_size: int,
+    seq_length: int,
+    num_experts: Optional[int],
+    fine_grained_activation_offloading: bool,
+    offload_modules: Optional[List[str]],
+    min_offloaded_tensor_size: int,
+    is_mla: bool,
+    cuda_graph_impl: str,
+    cuda_graph_scope: Optional[List[str]],
+    cuda_graph_warmup_steps: int,
+    delay_offload_until_cuda_graph: bool = False,
+    activation_offload_fraction: float = 1.0,
+) -> GPTModel:
+    """Build a GPTModel with CUDA Graph support and fine-grained activation offloading."""
+    model_parallel_cuda_manual_seed(seed)
+    torch.manual_seed(seed)
+    ConfigClass = MLATransformerConfig if is_mla else TransformerConfig
+    transformer_config = ConfigClass(
+        num_layers=num_layers,
+        hidden_size=hidden_size,
+        num_attention_heads=num_attention_heads,
+        use_cpu_initialization=True,
+        attention_backend=AttnBackend.unfused,
+        bf16=True,
+        # Recompute
+        recompute_modules=["layernorm", "moe_act"] if num_experts is not None else ["layernorm"],
+        recompute_granularity="selective",
+        # MoE
+        num_moe_experts=num_experts,
+        moe_grouped_gemm=(num_experts is not None),
+        # Fine-grained activation offloading
+        fine_grained_activation_offloading=fine_grained_activation_offloading,
+        offload_modules=offload_modules,
+        min_offloaded_tensor_size=min_offloaded_tensor_size,
+        delay_offload_until_cuda_graph=delay_offload_until_cuda_graph,
+        activation_offload_fraction=activation_offload_fraction,
+        # CUDA Graph settings
+        cuda_graph_impl=cuda_graph_impl,
+        cuda_graph_scope=cuda_graph_scope,
+        cuda_graph_warmup_steps=cuda_graph_warmup_steps,
+        use_te_rng_tracker=True,
+    )
+    gpt_model = GPTModel(
+        config=transformer_config,
+        transformer_layer_spec=get_gpt_layer_with_transformer_engine_spec(
+            num_experts=num_experts,
+            moe_grouped_gemm=num_experts is not None,
+            moe_use_legacy_grouped_gemm=False,
+            multi_latent_attention=is_mla,
+        ),
+        vocab_size=vocab_size,
+        max_sequence_length=seq_length,
+    ).bfloat16()
+    return gpt_model
+
+
+def _run_iters_with_cuda_graph(
+    model: GPTModel,
+    *,
+    input_ids: torch.Tensor,
+    position_ids: torch.Tensor,
+    attention_mask: torch.Tensor,
+    num_warmup_iters: int,
+    num_measure_iters: int,
+    enable_offload_reset: bool,
+) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], int]:
+    """
+    Run multiple forward+backward iterations with CUDA graph capture.
+
+    Returns:
+      - logits from last iteration (CPU float32)
+      - selected grads from last iteration (CPU float32)
+      - peak_memory_allocated (bytes) during measurement iterations
+    """
+    from megatron.core.transformer.cuda_graphs import _CudagraphGlobalRecord, delete_cuda_graphs
+
+    if enable_offload_reset:
+        off_interface.reset()
+
+    # Warmup iterations (before CUDA graph capture)
+    for _ in range(num_warmup_iters):
+        if enable_offload_reset:
+            off_interface.reset()
+        logits = model(
+            input_ids=input_ids, position_ids=position_ids, attention_mask=attention_mask
+        )
+        loss = logits.float().sum()
+        loss.backward()
+        # Zero grads for next iteration
+        for p in model.parameters():
+            if p.grad is not None:
+                p.grad.zero_()
+
+    # Trigger post-warmup offload decisions
+    if enable_offload_reset:
+        off_interface.reset()
+
+    # Create CUDA graphs after warmup
+    _CudagraphGlobalRecord.create_cudagraphs()
+
+    # Measurement iterations (with CUDA graph replay)
+    torch.cuda.reset_peak_memory_stats()
+    for i in range(num_measure_iters):
+        if enable_offload_reset:
+            off_interface.reset()
+        logits = model(
+            input_ids=input_ids, position_ids=position_ids, attention_mask=attention_mask
+        )
+        loss = logits.float().sum()
+        loss.backward()
+        if i < num_measure_iters - 1:
+            for p in model.parameters():
+                if p.grad is not None:
+                    p.grad.zero_()
+
+    torch.cuda.synchronize()
+    peak_bytes = int(torch.cuda.max_memory_allocated())
+
+    # Capture grads from last iteration
+    grads: Dict[str, torch.Tensor] = {}
+    for name, p in model.named_parameters():
+        grads[name] = p.grad.detach().float().cpu() if p.grad is not None else None
+
+    # Cleanup CUDA graphs
+    delete_cuda_graphs()
+
+    return logits.detach().float().cpu(), grads, peak_bytes
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is required for offloading tests.")
+@pytest.mark.skipif(
+    not is_te_min_version("2.14.0"), reason="CUDA Graph with TE RNG tracker requires TE >= 2.13.0"
+)
+@pytest.mark.parametrize(
+    "is_mla, offload_modules, cuda_graph_scope, activation_offload_fraction, delay_offload",
+    [
+        # MoE model with attention CUDA graph + attn offloading
+        (False, ["core_attn", "attn_proj"], ["attn", "moe_router"], 1.0, True),
+        (False, ["expert_fc1", "moe_act"], ["attn", "moe_router", "moe_preprocess"], 1.0, True),
+        (False, ["core_attn", "attn_proj", "expert_fc1"], ["attn", "moe_router"], 1.0, True),
+        (
+            False,
+            ["core_attn", "attn_proj", "expert_fc1", "moe_act"],
+            ["attn", "moe_router"],
+            1.0,
+            True,
+        ),
+        (
+            False,
+            ["core_attn", "expert_fc1", "moe_act"],
+            ["attn", "moe_router", "moe_preprocess"],
+            1.0,
+            True,
+        ),
+        (
+            True,
+            ["core_attn", "attn_proj", "expert_fc1", "moe_act"],
+            ["attn", "moe_router", "moe_preprocess"],
+            1.0,
+            True,
+        ),
+        # Test activation_offload_fraction parameter
+        (False, ["core_attn", "attn_proj", "expert_fc1"], ["attn", "moe_router"], 0.0, True),
+        (False, ["core_attn", "attn_proj", "expert_fc1"], ["attn", "moe_router"], 0.5, True),
+        # Test delay_offload_until_cuda_graph parameter
+        (False, ["core_attn", "attn_proj", "expert_fc1"], ["attn", "moe_router"], 1.0, False),
+    ],
+)
+def test_fine_grained_activation_offloading_with_cuda_graph(
+    is_mla: bool,
+    offload_modules: List[str],
+    cuda_graph_scope: List[str],
+    activation_offload_fraction: float,
+    delay_offload: bool,
+):
+    """
+    Test fine-grained activation offloading combined with CUDA graph capture.
+
+    Verifies:
+    - Forward output correctness with CUDA graph + offloading
+    - Backward gradient correctness
+    - Memory savings from offloading are preserved with CUDA graphs
+    - Different activation_offload_fraction values work correctly
+    - Both delay_offload_until_cuda_graph=True/False produce correct results
+    """
+    from megatron.core.tensor_parallel.random import initialize_rng_tracker
+
+    os.environ.pop("NVTE_FUSED_ATTN", None)
+    os.environ.pop("NVTE_FLASH_ATTN", None)
+    os.environ.pop("NVTE_UNFUSED_ATTN", None)
+
+    initialize_rng_tracker(use_te_rng_tracker=True, force_reset=True)
+    Utils.initialize_model_parallel(tensor_model_parallel_size=1, pipeline_model_parallel_size=1)
+
+    seed = 123
+    num_experts = 4  # Always MoE model
+    num_layers = 4  # Smaller for faster test with CUDA graphs
+    hidden_size = 1024
+    num_attention_heads = 8
+    vocab_size = 512
+    seq_length = 512
+    micro_batch_size = 2
+    device = torch.device("cuda")
+    cuda_graph_warmup_steps = 3
+
+    input_ids, position_ids, attention_mask = _make_gpt_inputs(
+        seq_length=seq_length, micro_batch_size=micro_batch_size, device=device
+    )
+
+    off_interface.reset_instance()
+
+    try:
+        # 1) Baseline: CUDA graph enabled, offloading disabled
+        _reset_cuda_memory()
+        base_model = _build_gpt_model_with_cuda_graph(
+            seed=seed,
+            num_layers=num_layers,
+            hidden_size=hidden_size,
+            num_attention_heads=num_attention_heads,
+            vocab_size=vocab_size,
+            seq_length=seq_length,
+            num_experts=num_experts,
+            fine_grained_activation_offloading=False,
+            offload_modules=None,
+            min_offloaded_tensor_size=1024 * 1024,
+            is_mla=is_mla,
+            cuda_graph_impl="transformer_engine",
+            cuda_graph_scope=cuda_graph_scope,
+            cuda_graph_warmup_steps=cuda_graph_warmup_steps,
+        ).cuda()
+        base_model.train()
+
+        base_logits, base_grads, base_peak = _run_iters_with_cuda_graph(
+            base_model,
+            input_ids=input_ids,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            num_warmup_iters=cuda_graph_warmup_steps,
+            num_measure_iters=2,
+            enable_offload_reset=False,
+        )
+        del base_model
+        _reset_cuda_memory()
+
+        # 2) Test: CUDA graph enabled + offloading enabled
+        off_interface.reset_instance()
+
+        off_model = _build_gpt_model_with_cuda_graph(
+            seed=seed,
+            num_layers=num_layers,
+            hidden_size=hidden_size,
+            num_attention_heads=num_attention_heads,
+            vocab_size=vocab_size,
+            seq_length=seq_length,
+            num_experts=num_experts,
+            fine_grained_activation_offloading=True,
+            offload_modules=offload_modules,
+            min_offloaded_tensor_size=1024,  # Force offloading for determinism
+            is_mla=is_mla,
+            cuda_graph_impl="transformer_engine",
+            cuda_graph_scope=cuda_graph_scope,
+            cuda_graph_warmup_steps=cuda_graph_warmup_steps,
+            delay_offload_until_cuda_graph=delay_offload,
+            activation_offload_fraction=activation_offload_fraction,
+        ).cuda()
+        off_model.train()
+
+        off_logits, off_grads, off_peak = _run_iters_with_cuda_graph(
+            off_model,
+            input_ids=input_ids,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            num_warmup_iters=cuda_graph_warmup_steps,
+            num_measure_iters=2,
+            enable_offload_reset=True,
+        )
+        del off_model
+        _reset_cuda_memory()
+
+        # 3) Correctness checks
+        assert torch.allclose(
+            off_logits, base_logits, rtol=1e-2, atol=1e-2
+        ), f"Logits mismatch: max_diff={torch.max(torch.abs(off_logits - base_logits))}"
+        assert set(off_grads.keys()) == set(base_grads.keys())
+        for name, gb in base_grads.items():
+            go = off_grads[name]
+            if gb is None or go is None:
+                assert gb is None and go is None, f"Grad None mismatch for {name}"
+                continue
+            assert torch.allclose(
+                go, gb, rtol=1e-2, atol=1e-2
+            ), f"Grad mismatch for {name}: max_diff={torch.max(torch.abs(go - gb))}"
+
+        # 4) Memory checks - offloading should still reduce memory with CUDA graphs
+        saved_mib = (base_peak - off_peak) / (1024**2)
+        print(
+            f"CUDA Graph + Offload test (fraction={activation_offload_fraction}, delay={delay_offload}): "
+            f"base_peak={base_peak/(1024**2):.2f}MiB, "
+            f"off_peak={off_peak/(1024**2):.2f}MiB, "
+            f"saved={saved_mib:.2f}MiB"
+        )
+
+        # Basic sanity checks
+        assert not torch.isnan(off_logits).any(), "NaN detected in logits"
+        assert not torch.isinf(off_logits).any(), "Inf detected in logits"
+
+        # Check gradients are valid
+        for name, g in off_grads.items():
+            if g is not None:
+                assert not torch.isnan(g).any(), f"NaN detected in grad for {name}"
+                assert not torch.isinf(g).any(), f"Inf detected in grad for {name}"
+
+        # Note: With CUDA graphs, memory behavior may differ from eager mode.
+        # We check that offloading doesn't significantly increase memory.
+        # In some cases, graph capture overhead may offset offload savings.
+        assert saved_mib >= -DELTA, (
+            f"Offloading with CUDA graph significantly increased memory: "
+            f"saved={saved_mib:.2f}MiB (negative means increase)"
+        )
+
+    finally:
+        Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/pipeline_parallel/test_pp_mhc_compatibility.py b/tests/unit_tests/pipeline_parallel/test_pp_mhc_compatibility.py
new file mode 100644
index 00000000000..eda8ffe7df4
--- /dev/null
+++ b/tests/unit_tests/pipeline_parallel/test_pp_mhc_compatibility.py
@@ -0,0 +1,1123 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""
+Unit tests for PP / VPP + mHC (Hyper Connections) compatibility.
+
+Tests cover:
+1. get_tensor_shapes: shape correctness with mHC for all PP stages
+2. get_num_layers_to_build: layer counts with standalone embedding/loss + mHC
+3. TransformerBlock expand/contract: correct placement at PP boundaries
+4. VPP tensor_shape: single shape used across all chunks with mHC
+5. E2E forward pass: PP + mHC + standalone embedding/loss (multi-GPU)
+6. Flexible VPP layout (pipeline_model_parallel_layout) + mHC compatibility
+
+Run with:
+    uv run --no-sync pytest tests/unit_tests/pipeline_parallel/test_pp_mhc_compatibility.py -s -x
+    # Multi-GPU tests (world_size >= 2):
+    torchrun --nproc-per-node=2 -m pytest tests/unit_tests/pipeline_parallel/test_pp_mhc_compatibility.py -s -x
+"""
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+import pytest
+import torch
+
+from megatron.core import parallel_state
+from megatron.core.pipeline_parallel.schedules import get_tensor_shapes
+from megatron.core.transformer.hyper_connection import HyperConnectionModule
+from megatron.core.transformer.transformer_block import get_num_layers_to_build
+from megatron.core.transformer.transformer_config import TransformerConfig
+from tests.unit_tests.test_utilities import Utils
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_pp_group(rank: int, size: int):
+    """Create a mock PP process group with given rank and size."""
+    pg = MagicMock()
+    pg.rank.return_value = rank
+    pg.size.return_value = size
+    return pg
+
+
+def _make_tp_cp_groups(tp_size: int = 1, cp_size: int = 1):
+    tp = MagicMock()
+    tp.size.return_value = tp_size
+    cp = MagicMock()
+    cp.size.return_value = cp_size
+    return tp, cp
+
+
+def _get_send_recv_shapes(config, pp_size, seq=32, mbs=2):
+    """Get (send_shape, recv_shape) for each PP rank."""
+    tp, cp = _make_tp_cp_groups()
+    results = []
+    for rank in range(pp_size):
+        send = get_tensor_shapes(
+            seq_length=seq,
+            micro_batch_size=mbs,
+            decoder_seq_length=None,
+            config=config,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=_make_pp_group(rank, pp_size),
+            is_recv=False,
+        )
+        recv = get_tensor_shapes(
+            seq_length=seq,
+            micro_batch_size=mbs,
+            decoder_seq_length=None,
+            config=config,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=_make_pp_group(rank, pp_size),
+            is_recv=True,
+        )
+        results.append((send, recv))
+    return results
+
+
+def _make_config(
+    hidden_size=64,
+    num_layers=8,
+    pp_size=2,
+    vp_size=None,
+    enable_hyper_connections=False,
+    num_residual_streams=4,
+    account_for_embedding=False,
+    account_for_loss=False,
+    num_layers_first=None,
+    num_layers_last=None,
+    **extra,
+):
+    """Build a TransformerConfig for testing without initializing parallel state."""
+    kwargs = dict(
+        hidden_size=hidden_size,
+        num_layers=num_layers,
+        num_attention_heads=4,
+        pipeline_model_parallel_size=pp_size,
+        virtual_pipeline_model_parallel_size=vp_size,
+        enable_hyper_connections=enable_hyper_connections,
+        num_residual_streams=num_residual_streams,
+        account_for_embedding_in_pipeline_split=account_for_embedding,
+        account_for_loss_in_pipeline_split=account_for_loss,
+        num_layers_in_first_pipeline_stage=num_layers_first,
+        num_layers_in_last_pipeline_stage=num_layers_last,
+        use_cpu_initialization=True,
+    )
+    if pp_size > 1:
+        kwargs.setdefault('pipeline_dtype', torch.bfloat16)
+    kwargs.update(extra)
+    return TransformerConfig(**kwargs)
+
+
+# ===========================================================================
+# 1. get_tensor_shapes — shape correctness with mHC
+# ===========================================================================
+
+
+class TestGetTensorShapesWithMHC:
+    """Verify get_tensor_shapes returns correct hidden dim for mHC-enabled models."""
+
+    SEQ, MBS, H = 32, 2, 64
+    N_STREAMS = 4
+
+    def _shapes(self, config, pp_rank, pp_size, is_recv):
+        tp, cp = _make_tp_cp_groups()
+        pp = _make_pp_group(pp_rank, pp_size)
+        return get_tensor_shapes(
+            seq_length=self.SEQ,
+            micro_batch_size=self.MBS,
+            decoder_seq_length=None,
+            config=config,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=pp,
+            is_recv=is_recv,
+        )
+
+    # --- Without mHC (baseline) ---
+
+    def test_no_mhc_pp2_all_stages(self):
+        cfg = _make_config(hidden_size=self.H, pp_size=2, enable_hyper_connections=False)
+        for rank in range(2):
+            for is_recv in (True, False):
+                shapes = self._shapes(cfg, rank, 2, is_recv)
+                assert shapes == [(self.SEQ, self.MBS, self.H)]
+
+    # --- With mHC, PP=2 ---
+
+    def test_mhc_pp2_rank0_send_nstream(self):
+        """PP rank 0 sends n*C to rank 1."""
+        cfg = _make_config(
+            hidden_size=self.H,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=self.N_STREAMS,
+        )
+        shapes = self._shapes(cfg, pp_rank=0, pp_size=2, is_recv=False)
+        assert shapes == [(self.SEQ, self.MBS, self.H * self.N_STREAMS)]
+
+    def test_mhc_pp2_rank0_recv_1stream(self):
+        """PP rank 0 receives nothing from previous (is first stage), so shape = C."""
+        cfg = _make_config(
+            hidden_size=self.H,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=self.N_STREAMS,
+        )
+        shapes = self._shapes(cfg, pp_rank=0, pp_size=2, is_recv=True)
+        assert shapes == [(self.SEQ, self.MBS, self.H)]
+
+    def test_mhc_pp2_rank1_recv_nstream(self):
+        """PP rank 1 receives n*C from rank 0."""
+        cfg = _make_config(
+            hidden_size=self.H,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=self.N_STREAMS,
+        )
+        shapes = self._shapes(cfg, pp_rank=1, pp_size=2, is_recv=True)
+        assert shapes == [(self.SEQ, self.MBS, self.H * self.N_STREAMS)]
+
+    def test_mhc_pp2_rank1_send_1stream(self):
+        """PP rank 1 (last stage) sends C (after output_contract)."""
+        cfg = _make_config(
+            hidden_size=self.H,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=self.N_STREAMS,
+        )
+        shapes = self._shapes(cfg, pp_rank=1, pp_size=2, is_recv=False)
+        assert shapes == [(self.SEQ, self.MBS, self.H)]
+
+    # --- With mHC, PP=4 (intermediate ranks) ---
+
+    def test_mhc_pp4_intermediate_ranks(self):
+        """Intermediate ranks both send and receive n*C."""
+        cfg = _make_config(
+            hidden_size=self.H,
+            pp_size=4,
+            num_layers=8,
+            enable_hyper_connections=True,
+            num_residual_streams=self.N_STREAMS,
+        )
+        for rank in (1, 2):
+            for is_recv in (True, False):
+                shapes = self._shapes(cfg, pp_rank=rank, pp_size=4, is_recv=is_recv)
+                assert shapes == [
+                    (self.SEQ, self.MBS, self.H * self.N_STREAMS)
+                ], f"rank={rank}, is_recv={is_recv}"
+
+    # --- With sequence parallel ---
+
+    def test_mhc_with_sequence_parallel(self):
+        """Sequence parallel divides seq_length by TP size."""
+        cfg = _make_config(
+            hidden_size=self.H,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=self.N_STREAMS,
+            sequence_parallel=True,
+            tensor_model_parallel_size=2,
+        )
+        tp, cp = _make_tp_cp_groups(tp_size=2)
+        pp = _make_pp_group(0, 2)
+        shapes = get_tensor_shapes(
+            seq_length=self.SEQ,
+            micro_batch_size=self.MBS,
+            decoder_seq_length=None,
+            config=cfg,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=pp,
+            is_recv=False,
+        )
+        assert shapes == [(self.SEQ // 2, self.MBS, self.H * self.N_STREAMS)]
+
+
+# ===========================================================================
+# 2. get_num_layers_to_build — mHC + standalone embedding/loss
+# ===========================================================================
+
+
+class TestGetNumLayersToBuilWithMHC:
+    """
+    Verify layer counts are correct when mHC is combined with standalone
+    embedding / loss stages (account_for_embedding/loss_in_pipeline_split).
+    mHC itself doesn't change layer counts, but we need to ensure the
+    combination doesn't break.
+    """
+
+    def test_pp2_even_split_mhc(self):
+        cfg = _make_config(num_layers=8, pp_size=2, enable_hyper_connections=True)
+        assert get_num_layers_to_build(cfg, pp_rank=0) == 4
+        assert get_num_layers_to_build(cfg, pp_rank=1) == 4
+
+    def test_pp2_standalone_embedding_mhc(self):
+        """With standalone embedding on PP rank 0, rank 0 builds fewer layers."""
+        cfg = _make_config(
+            num_layers=8,
+            pp_size=2,
+            enable_hyper_connections=True,
+            account_for_embedding=True,
+            account_for_loss=True,
+        )
+        # (8 + 1 + 1) / 2 = 5 per rank
+        # rank 0: 5 - 1 (embedding) = 4 transformer layers
+        # rank 1: 5 - 1 (loss) = 4 transformer layers
+        assert get_num_layers_to_build(cfg, pp_rank=0) == 4
+        assert get_num_layers_to_build(cfg, pp_rank=1) == 4
+
+    def test_pp4_standalone_invalid_division_raises(self):
+        """PP=4, standalone embedding+loss, 12 layers → (12+2)/4=3.5 → raises."""
+        with pytest.raises((ValueError, AssertionError)):
+            _make_config(
+                num_layers=12,
+                pp_size=4,
+                enable_hyper_connections=True,
+                account_for_embedding=True,
+                account_for_loss=True,
+            )
+
+    def test_pp4_standalone_both_mhc_valid(self):
+        """Valid configuration: (14+2)/4 = 4 per rank."""
+        cfg = _make_config(
+            num_layers=14,
+            pp_size=4,
+            enable_hyper_connections=True,
+            account_for_embedding=True,
+            account_for_loss=True,
+        )
+        # rank 0: 4 - 1 (embedding) = 3
+        # rank 1, 2: 4
+        # rank 3: 4 - 1 (loss) = 3
+        assert get_num_layers_to_build(cfg, pp_rank=0) == 3
+        assert get_num_layers_to_build(cfg, pp_rank=1) == 4
+        assert get_num_layers_to_build(cfg, pp_rank=2) == 4
+        assert get_num_layers_to_build(cfg, pp_rank=3) == 3
+
+    def test_uneven_pp_with_mhc(self):
+        """Uneven PP: first stage has 2 layers, last has 2, middle gets 2 each."""
+        cfg = _make_config(
+            num_layers=8,
+            pp_size=4,
+            enable_hyper_connections=True,
+            num_layers_first=2,
+            num_layers_last=2,
+        )
+        assert get_num_layers_to_build(cfg, pp_rank=0) == 2
+        assert get_num_layers_to_build(cfg, pp_rank=1) == 2
+        assert get_num_layers_to_build(cfg, pp_rank=2) == 2
+        assert get_num_layers_to_build(cfg, pp_rank=3) == 2
+
+    def test_vpp_with_mhc(self):
+        """VPP=2 with mHC: each VP stage gets half the layers per rank."""
+        cfg = _make_config(num_layers=8, pp_size=2, vp_size=2, enable_hyper_connections=True)
+        for pp_rank in range(2):
+            for vp_stage in range(2):
+                n = get_num_layers_to_build(cfg, vp_stage=vp_stage, pp_rank=pp_rank)
+                assert n == 2, f"pp_rank={pp_rank}, vp_stage={vp_stage}, got {n}"
+
+    def test_vpp_standalone_embedding_loss_invalid_raises(self):
+        """VPP=2, standalone embedding+loss, pp=2, 8 layers → 10/2=5, 5%2!=0 → raises."""
+        with pytest.raises((ValueError, AssertionError)):
+            _make_config(
+                num_layers=8,
+                pp_size=2,
+                vp_size=2,
+                enable_hyper_connections=True,
+                account_for_embedding=True,
+                account_for_loss=True,
+            )
+
+    def test_vpp_standalone_both_valid_mhc(self):
+        """VPP=2, standalone embed+loss, pp=4, 14 layers → (14+2)/4=4, 4/2=2 per VP."""
+        cfg = _make_config(
+            num_layers=14,
+            pp_size=4,
+            vp_size=2,
+            enable_hyper_connections=True,
+            account_for_embedding=True,
+            account_for_loss=True,
+        )
+        # rank 0, vp 0: first PP + first VP → 2 - 1(embed) = 1
+        assert get_num_layers_to_build(cfg, vp_stage=0, pp_rank=0) == 1
+        # rank 0, vp 1: first PP + second VP → 2
+        assert get_num_layers_to_build(cfg, vp_stage=1, pp_rank=0) == 2
+        # rank 1-2: 2 per VP stage
+        for rank in (1, 2):
+            for vp in (0, 1):
+                assert get_num_layers_to_build(cfg, vp_stage=vp, pp_rank=rank) == 2
+        # rank 3, vp 0: 2
+        assert get_num_layers_to_build(cfg, vp_stage=0, pp_rank=3) == 2
+        # rank 3, vp 1: last PP + last VP → 2 - 1(loss) = 1
+        assert get_num_layers_to_build(cfg, vp_stage=1, pp_rank=3) == 1
+
+
+# ===========================================================================
+# 3. TransformerBlock expand/contract — boundary logic
+# ===========================================================================
+
+
+class TestTransformerBlockMHCBoundaries:
+    """
+    Test that TransformerBlock correctly applies input_expand at pre_process
+    and output_contract at the final layernorm stage.
+    These are pure tensor operation tests — no GPU or parallel state needed.
+    """
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_input_expand(self):
+        n = 4
+        s, b, C = 8, 2, 64
+        x = torch.randn(s, b, C, device='cuda')
+        expanded = HyperConnectionModule.input_expand(x, n)
+        assert expanded.shape == (s, b, n * C)
+        # Each stream should be a copy of input
+        for i in range(n):
+            torch.testing.assert_close(expanded[:, :, i * C : (i + 1) * C], x)
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_output_contract(self):
+        n = 4
+        s, b, C = 8, 2, 64
+        x = torch.randn(s, b, n * C, device='cuda')
+        contracted = HyperConnectionModule.output_contract(x, n)
+        assert contracted.shape == (s, b, C)
+        # Should be the mean of all n streams
+        expected = x.view(s, b, n, C).mean(dim=2)
+        torch.testing.assert_close(contracted, expected)
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_expand_then_contract_preserves_shape(self):
+        n = 4
+        s, b, C = 8, 2, 64
+        x = torch.randn(s, b, C, device='cuda')
+        expanded = HyperConnectionModule.input_expand(x, n)
+        contracted = HyperConnectionModule.output_contract(expanded, n)
+        assert contracted.shape == x.shape
+        # expand copies all streams → mean of identical streams = original
+        torch.testing.assert_close(contracted, x)
+
+
+# ===========================================================================
+# 3b. Zero-layer VP stage edge cases with mHC
+# ===========================================================================
+
+
+class TestZeroLayerVPStageWithMHC:
+    """
+    When standalone embedding/loss makes a VP stage have very few (1) transformer
+    layers, verify layer counts stay non-negative.
+    """
+
+    def test_vpp_standalone_embed_first_stage_has_1_layer(self):
+        """First VP stage at first PP rank should have exactly 1 layer (2-1=1)."""
+        cfg = _make_config(
+            num_layers=7,
+            pp_size=2,
+            vp_size=2,
+            enable_hyper_connections=True,
+            account_for_embedding=True,
+        )
+        n = get_num_layers_to_build(cfg, vp_stage=0, pp_rank=0)
+        assert n == 1
+        assert n >= 0
+
+    def test_vpp_standalone_loss_last_stage_has_1_layer(self):
+        """Last VP stage at last PP rank should have exactly 1 layer (2-1=1)."""
+        cfg = _make_config(
+            num_layers=7, pp_size=2, vp_size=2, enable_hyper_connections=True, account_for_loss=True
+        )
+        n = get_num_layers_to_build(cfg, vp_stage=1, pp_rank=1)
+        assert n == 1
+        assert n >= 0
+
+    def test_vpp_standalone_both_boundary_layers(self):
+        """Both first and last VP stages lose a layer, but all counts remain >= 0."""
+        cfg = _make_config(
+            num_layers=14,
+            pp_size=4,
+            vp_size=2,
+            enable_hyper_connections=True,
+            account_for_embedding=True,
+            account_for_loss=True,
+        )
+        for pp_rank in range(4):
+            for vp_stage in range(2):
+                n = get_num_layers_to_build(cfg, vp_stage=vp_stage, pp_rank=pp_rank)
+                assert n >= 0, f"pp_rank={pp_rank}, vp_stage={vp_stage} has {n} < 0 layers"
+
+
+# ===========================================================================
+# 4. VPP tensor_shape — single shape for all chunks
+# ===========================================================================
+
+
+class TestVPPTensorShapeWithMHC:
+    """
+    Verify that the interleaved schedule uses n*C for all P2P communication
+    when mHC is enabled with PP > 1.
+    """
+
+    def test_interleaved_tensor_shape_uses_nstream(self):
+        """Reproduce the logic in forward_backward_pipelining_with_interleaving."""
+        hidden_size = 64
+        n_streams = 4
+        pp_size = 2
+
+        config = SimpleNamespace(
+            hidden_size=hidden_size,
+            enable_hyper_connections=True,
+            num_residual_streams=n_streams,
+            sequence_parallel=False,
+        )
+
+        hidden_dim = config.hidden_size
+        if getattr(config, 'enable_hyper_connections', False) and pp_size > 1:
+            hidden_dim = config.hidden_size * getattr(config, 'num_residual_streams', 1)
+
+        assert hidden_dim == hidden_size * n_streams
+
+    def test_interleaved_tensor_shape_no_mhc(self):
+        """Without mHC, hidden_dim = hidden_size."""
+        hidden_size = 64
+        pp_size = 2
+
+        config = SimpleNamespace(
+            hidden_size=hidden_size, enable_hyper_connections=False, sequence_parallel=False
+        )
+
+        hidden_dim = config.hidden_size
+        if getattr(config, 'enable_hyper_connections', False) and pp_size > 1:
+            hidden_dim = config.hidden_size * getattr(config, 'num_residual_streams', 1)
+
+        assert hidden_dim == hidden_size
+
+    def test_interleaved_tensor_shape_pp1_mhc_no_expand(self):
+        """PP=1 with mHC: no P2P communication needed, no shape change."""
+        hidden_size = 64
+        n_streams = 4
+        pp_size = 1
+
+        config = SimpleNamespace(
+            hidden_size=hidden_size,
+            enable_hyper_connections=True,
+            num_residual_streams=n_streams,
+            sequence_parallel=False,
+        )
+
+        hidden_dim = config.hidden_size
+        if getattr(config, 'enable_hyper_connections', False) and pp_size > 1:
+            hidden_dim = config.hidden_size * getattr(config, 'num_residual_streams', 1)
+
+        assert hidden_dim == hidden_size
+
+
+# ===========================================================================
+# 5. Shape consistency across PP stages with VPP + mHC
+# ===========================================================================
+
+
+class TestPPShapeConsistencyWithMHC:
+    """
+    Verify that send shape from one stage matches recv shape of the next stage.
+    This is critical: a mismatch would cause a hang or crash in P2P communication.
+    """
+
+    def test_pp2_mhc_send_recv_match(self):
+        """Rank 0's send shape must match rank 1's recv shape."""
+        cfg = _make_config(hidden_size=64, pp_size=2, enable_hyper_connections=True)
+        shapes = _get_send_recv_shapes(cfg, 2)
+        assert (
+            shapes[0][0] == shapes[1][1]
+        ), f"rank 0 send {shapes[0][0]} != rank 1 recv {shapes[1][1]}"
+
+    def test_pp4_mhc_all_consecutive_match(self):
+        """For all consecutive stages, send[i] == recv[i+1]."""
+        cfg = _make_config(hidden_size=64, num_layers=8, pp_size=4, enable_hyper_connections=True)
+        shapes = _get_send_recv_shapes(cfg, 4)
+        for i in range(3):
+            assert (
+                shapes[i][0] == shapes[i + 1][1]
+            ), f"rank {i} send {shapes[i][0]} != rank {i+1} recv {shapes[i+1][1]}"
+
+    def test_pp4_no_mhc_all_consecutive_match(self):
+        """Baseline: without mHC, all shapes should be plain hidden_size."""
+        cfg = _make_config(hidden_size=64, num_layers=8, pp_size=4)
+        shapes = _get_send_recv_shapes(cfg, 4)
+        for i in range(3):
+            assert shapes[i][0] == shapes[i + 1][1]
+            assert shapes[i][0] == [(32, 2, 64)]
+
+
+# ===========================================================================
+# 6. Standalone embedding / loss — PP boundary + mHC interaction
+# ===========================================================================
+
+
+class TestStandaloneEmbeddingLossWithMHC:
+    """
+    Verify that standalone embedding/loss configurations interact correctly
+    with mHC tensor shapes and layer counting.
+    """
+
+    def test_standalone_embedding_first_stage_has_fewer_layers(self):
+        """With standalone embedding, first PP/VP stage gets 1 fewer layer."""
+        # 7 layers, pp=2, vp=2 → (7+1)/2=4, 4/2=2 per VP stage
+        cfg = _make_config(
+            num_layers=7,
+            pp_size=2,
+            vp_size=2,
+            enable_hyper_connections=True,
+            account_for_embedding=True,
+        )
+        # rank 0, vp 0: first stage → 2 - 1(embed) = 1
+        assert get_num_layers_to_build(cfg, vp_stage=0, pp_rank=0) == 1
+        # rank 0, vp 1: 2
+        assert get_num_layers_to_build(cfg, vp_stage=1, pp_rank=0) == 2
+        # rank 1: 2 each VP
+        assert get_num_layers_to_build(cfg, vp_stage=0, pp_rank=1) == 2
+        assert get_num_layers_to_build(cfg, vp_stage=1, pp_rank=1) == 2
+
+    def test_standalone_loss_last_stage_has_fewer_layers(self):
+        """With standalone loss, last PP/VP stage gets 1 fewer layer."""
+        cfg = _make_config(
+            num_layers=7, pp_size=2, vp_size=2, enable_hyper_connections=True, account_for_loss=True
+        )
+        # (7+1)/2 = 4, 4/2 = 2 per VP
+        # rank 0: 2 each VP
+        assert get_num_layers_to_build(cfg, vp_stage=0, pp_rank=0) == 2
+        assert get_num_layers_to_build(cfg, vp_stage=1, pp_rank=0) == 2
+        # rank 1, vp 0: 2
+        assert get_num_layers_to_build(cfg, vp_stage=0, pp_rank=1) == 2
+        # rank 1, vp 1: last stage → 2 - 1(loss) = 1
+        assert get_num_layers_to_build(cfg, vp_stage=1, pp_rank=1) == 1
+
+    def test_standalone_both_mhc_shapes_still_consistent(self):
+        """With standalone embed+loss, P2P shapes should still match between stages."""
+        cfg = _make_config(
+            hidden_size=64,
+            num_layers=14,
+            pp_size=4,
+            enable_hyper_connections=True,
+            num_residual_streams=4,
+            account_for_embedding=True,
+            account_for_loss=True,
+        )
+        tp, cp = _make_tp_cp_groups()
+        for i in range(3):
+            send = get_tensor_shapes(
+                seq_length=32,
+                micro_batch_size=2,
+                decoder_seq_length=None,
+                config=cfg,
+                tp_group=tp,
+                cp_group=cp,
+                pp_group=_make_pp_group(i, 4),
+                is_recv=False,
+            )
+            recv = get_tensor_shapes(
+                seq_length=32,
+                micro_batch_size=2,
+                decoder_seq_length=None,
+                config=cfg,
+                tp_group=tp,
+                cp_group=cp,
+                pp_group=_make_pp_group(i + 1, 4),
+                is_recv=True,
+            )
+            assert send == recv, f"rank {i}→{i+1}: send={send} recv={recv}"
+
+    def test_mhc_shapes_first_stage_send_vs_second_recv(self):
+        """
+        First stage (pre_process) does input_expand: hidden [s,b,C] → [s,b,n*C].
+        The send shape from rank 0 should be n*C.
+        The recv shape at rank 1 should also be n*C.
+        """
+        H, N = 64, 4
+        cfg = _make_config(
+            hidden_size=H,
+            num_layers=8,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=N,
+        )
+        tp, cp = _make_tp_cp_groups()
+        send_0 = get_tensor_shapes(
+            seq_length=32,
+            micro_batch_size=2,
+            decoder_seq_length=None,
+            config=cfg,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=_make_pp_group(0, 2),
+            is_recv=False,
+        )
+        recv_1 = get_tensor_shapes(
+            seq_length=32,
+            micro_batch_size=2,
+            decoder_seq_length=None,
+            config=cfg,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=_make_pp_group(1, 2),
+            is_recv=True,
+        )
+        assert send_0 == [(32, 2, H * N)]
+        assert recv_1 == [(32, 2, H * N)]
+        assert send_0 == recv_1
+
+    def test_mhc_shapes_last_stage_output_is_1stream(self):
+        """
+        Last stage (post_process) does output_contract: [s,b,n*C] → [s,b,C].
+        The send shape from last rank should be C (but get_tensor_shapes returns C
+        because last rank doesn't send forward).
+        """
+        H, N = 64, 4
+        cfg = _make_config(
+            hidden_size=H,
+            num_layers=8,
+            pp_size=2,
+            enable_hyper_connections=True,
+            num_residual_streams=N,
+        )
+        tp, cp = _make_tp_cp_groups()
+        send_last = get_tensor_shapes(
+            seq_length=32,
+            micro_batch_size=2,
+            decoder_seq_length=None,
+            config=cfg,
+            tp_group=tp,
+            cp_group=cp,
+            pp_group=_make_pp_group(1, 2),
+            is_recv=False,
+        )
+        # Last stage sends C (after contract), not n*C
+        assert send_last == [(32, 2, H)]
+
+
+# ===========================================================================
+# 7. E2E forward pass tests (require multi-GPU)
+# ===========================================================================
+
+
+@pytest.mark.internal
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+@pytest.mark.skipif(
+    int(__import__('os').environ.get('WORLD_SIZE', '1')) < 2, reason="Requires at least 2 GPUs"
+)
+class TestPPForwardWithMHC:
+    """
+    End-to-end forward pass tests with PP + mHC.
+    Requires multi-GPU (torchrun --nproc-per-node=2+).
+    """
+
+    def _run_forward(
+        self, pp_size, vp_size, enable_mhc, account_for_embedding=False, account_for_loss=False
+    ):
+        from megatron.core import mpu
+        from megatron.core.models.gpt.gpt_layer_specs import (
+            get_gpt_layer_with_transformer_engine_spec,
+        )
+        from megatron.core.models.gpt.gpt_model import GPTModel
+        from megatron.core.num_microbatches_calculator import (
+            init_num_microbatches_calculator,
+            unset_num_microbatches_calculator,
+        )
+        from megatron.core.pipeline_parallel import get_forward_backward_func
+        from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+        from megatron.core.transformer.enums import ModelType
+        from megatron.training.global_vars import set_args
+        from tests.unit_tests.test_utilities import Utils
+
+        num_layers = 8
+        hidden_size = 64
+        num_heads = 4
+        seq_length = 16
+        micro_batch_size = 2
+        vocab_size = 128
+
+        Utils.initialize_model_parallel(1, pp_size, vp_size)
+        model_parallel_cuda_manual_seed(42)
+        init_num_microbatches_calculator(0, None, 1, 1, 1)
+
+        try:
+            config = TransformerConfig(
+                num_layers=num_layers,
+                hidden_size=hidden_size,
+                num_attention_heads=num_heads,
+                use_cpu_initialization=True,
+                pipeline_dtype=torch.bfloat16,
+                bf16=True,
+                pipeline_model_parallel_size=pp_size,
+                virtual_pipeline_model_parallel_size=vp_size,
+                enable_hyper_connections=enable_mhc,
+                num_residual_streams=4 if enable_mhc else 1,
+                account_for_embedding_in_pipeline_split=account_for_embedding,
+                account_for_loss_in_pipeline_split=account_for_loss,
+                hidden_dropout=0.0,
+                attention_dropout=0.0,
+            )
+
+            spec = get_gpt_layer_with_transformer_engine_spec(enable_hyper_connection=enable_mhc)
+
+            models = []
+            for i in range(vp_size or 1):
+                pre_process = mpu.is_pipeline_first_stage(ignore_virtual=False, vp_stage=i)
+                post_process = mpu.is_pipeline_last_stage(ignore_virtual=False, vp_stage=i)
+                m = (
+                    GPTModel(
+                        config=config,
+                        transformer_layer_spec=spec,
+                        vocab_size=vocab_size,
+                        max_sequence_length=seq_length,
+                        pre_process=pre_process,
+                        post_process=post_process,
+                        position_embedding_type="rope",
+                        vp_stage=i,
+                        share_embeddings_and_output_weights=False,
+                    )
+                    .bfloat16()
+                    .cuda()
+                )
+                m.model_type = ModelType.encoder_or_decoder
+                models.append(m)
+
+            if vp_size is None:
+                models = models[0]
+                model_list = [models]
+            else:
+                model_list = models
+
+            def forward_step_func(data_iterator, model):
+                tokens = torch.randint(0, vocab_size, (micro_batch_size, seq_length)).cuda()
+                position_ids = (
+                    torch.arange(seq_length).unsqueeze(0).expand(micro_batch_size, -1).cuda()
+                )
+                labels = torch.randint(0, vocab_size, (micro_batch_size, seq_length)).cuda()
+                output = model(tokens, position_ids, None, labels=labels)
+
+                def loss_func(output_tensor):
+                    loss = output_tensor.sum()
+                    return output_tensor, loss
+
+                return output, loss_func
+
+            forward_backward_func = get_forward_backward_func()
+
+            def make_iter():
+                while True:
+                    yield None
+
+            data_iters = [make_iter()] * len(model_list)
+
+            losses = forward_backward_func(
+                forward_step_func=forward_step_func,
+                data_iterator=data_iters,
+                model=model_list,
+                num_microbatches=4,
+                seq_length=seq_length,
+                micro_batch_size=micro_batch_size,
+                forward_only=True,
+            )
+            return losses
+
+        finally:
+            unset_num_microbatches_calculator()
+            Utils.destroy_model_parallel()
+
+    def test_pp2_mhc_forward(self):
+        """PP=2 + mHC forward pass should not hang."""
+        self._run_forward(pp_size=2, vp_size=None, enable_mhc=True)
+
+    def test_pp2_vpp2_mhc_forward(self):
+        """PP=2 + VPP=2 + mHC forward pass should not hang."""
+        self._run_forward(pp_size=2, vp_size=2, enable_mhc=True)
+
+    def test_pp2_mhc_standalone_embedding_forward(self):
+        """PP=2 + mHC + standalone embedding."""
+        # (8+1)/2 = 4.5 → need (num_layers+1) divisible by pp_size
+        # Use default 8 layers, won't divide evenly. Skip standalone embedding
+        # with 8 layers pp=2 as (8+1)/2 isn't integer.
+        # The test framework should raise ValueError, confirming the validation.
+        with pytest.raises((ValueError, AssertionError)):
+            self._run_forward(pp_size=2, vp_size=None, enable_mhc=True, account_for_embedding=True)
+
+    def test_pp2_mhc_standalone_both_forward(self):
+        """PP=2 + mHC + standalone embedding + loss: (8+2)/2=5, works."""
+        self._run_forward(
+            pp_size=2,
+            vp_size=None,
+            enable_mhc=True,
+            account_for_embedding=True,
+            account_for_loss=True,
+        )
+
+    def test_pp2_no_mhc_forward_baseline(self):
+        """Baseline: PP=2 without mHC should work fine."""
+        self._run_forward(pp_size=2, vp_size=None, enable_mhc=False)
+
+
+# ===========================================================================
+# 8. Flexible VPP layout (pipeline_model_parallel_layout) + mHC
+# ===========================================================================
+
+
+def _make_layout_config(
+    hidden_size=64,
+    num_layers=8,
+    pp_size=2,
+    layout=None,
+    enable_hyper_connections=False,
+    num_residual_streams=4,
+    **extra,
+):
+    """Build a TransformerConfig with a flexible VPP layout for testing.
+
+    Unlike _make_config, this uses pipeline_model_parallel_layout instead of
+    account_for_embedding/loss flags, since they are mutually exclusive.
+    """
+    kwargs = dict(
+        hidden_size=hidden_size,
+        num_layers=num_layers,
+        num_attention_heads=4,
+        pipeline_model_parallel_size=pp_size,
+        pipeline_model_parallel_layout=layout,
+        pipeline_dtype=torch.bfloat16,
+        enable_hyper_connections=enable_hyper_connections,
+        num_residual_streams=num_residual_streams,
+        use_cpu_initialization=True,
+    )
+    kwargs.update(extra)
+    return TransformerConfig(**kwargs)
+
+
+class TestFlexibleVPPLayoutLayerCountsWithMHC:
+    """
+    Verify get_num_layers_to_build returns correct layer counts when
+    flexible VPP layout (pipeline_model_parallel_layout) is combined with mHC.
+    mHC itself doesn't change layer counts, so these tests confirm the
+    combination doesn't break anything.
+    """
+
+    def setup_method(self, method):
+        pass
+
+    def teardown_method(self, method):
+        parallel_state.set_pipeline_model_parallel_world_size(None)
+        parallel_state.set_virtual_pipeline_model_parallel_world_size(None)
+
+    def test_pp2_vpp2_standalone_embed_loss_mhc(self):
+        """PP=2, VPP=2: standalone embedding & loss on separate VP stages."""
+        # Layout: [["embedding"], ["decoder"]*6, ["decoder"], ["loss"]]
+        # PP=2, VPP=2 → 4 stages:
+        #   PP0 VP0: ["embedding"]    → 0 decoders
+        #   PP1 VP0: ["decoder"]*6    → 6 decoders
+        #   PP0 VP1: ["decoder"]      → 1 decoder
+        #   PP1 VP1: ["loss"]         → 0 decoders
+        layout = [["embedding"], ["decoder"] * 6, ["decoder"], ["loss"]]
+        Utils.fake_initialize_model_parallel(
+            pipeline_model_parallel_size=2, virtual_pipeline_model_parallel_size=2
+        )
+        cfg = _make_layout_config(
+            num_layers=7,
+            pp_size=2,
+            layout=layout,
+            enable_hyper_connections=True,
+            num_residual_streams=4,
+        )
+
+        expected = {(0, 0): 0, (0, 1): 1, (1, 0): 6, (1, 1): 0}
+        total = 0
+        for pp_rank in range(2):
+            parallel_state.set_pipeline_model_parallel_rank(pp_rank)
+            for vp in range(2):
+                n = get_num_layers_to_build(cfg, vp_stage=vp)
+                assert (
+                    n == expected[(pp_rank, vp)]
+                ), f"pp_rank={pp_rank}, vp={vp}: expected {expected[(pp_rank, vp)]}, got {n}"
+                total += n
+        assert total == 7
+
+    def test_pp2_vpp2_even_split_mhc(self):
+        """PP=2, VPP=2: even split with embedding/loss attached to decoder stages."""
+        # Layout: [["embedding","decoder","decoder"], ["decoder"]*4,
+        #          ["decoder"], ["decoder","loss"]]
+        # PP0 VP0: ["embedding","decoder","decoder"] → 2 decoders
+        # PP1 VP0: ["decoder"]*4                     → 4 decoders
+        # PP0 VP1: ["decoder"]                       → 1 decoder
+        # PP1 VP1: ["decoder","loss"]                → 1 decoder
+        layout = [
+            ["embedding", "decoder", "decoder"],
+            ["decoder"] * 4,
+            ["decoder"],
+            ["decoder", "loss"],
+        ]
+        Utils.fake_initialize_model_parallel(
+            pipeline_model_parallel_size=2, virtual_pipeline_model_parallel_size=2
+        )
+        cfg = _make_layout_config(
+            num_layers=8, pp_size=2, layout=layout, enable_hyper_connections=True
+        )
+
+        expected = {(0, 0): 2, (0, 1): 1, (1, 0): 4, (1, 1): 1}
+        total = 0
+        for pp_rank in range(2):
+            parallel_state.set_pipeline_model_parallel_rank(pp_rank)
+            for vp in range(2):
+                n = get_num_layers_to_build(cfg, vp_stage=vp)
+                assert (
+                    n == expected[(pp_rank, vp)]
+                ), f"pp_rank={pp_rank}, vp={vp}: expected {expected[(pp_rank, vp)]}, got {n}"
+                total += n
+        assert total == 8
+
+    def test_pp2_vpp2_empty_stage_mhc(self):
+        """PP=2, VPP=2: empty VP stage (standalone embedding) with mHC."""
+        # Layout: [["embedding"], ["decoder"]*7, [], ["loss"]]
+        # PP0 VP0: ["embedding"]  → 0 decoders
+        # PP1 VP0: ["decoder"]*7  → 7 decoders
+        # PP0 VP1: []             → 0 decoders
+        # PP1 VP1: ["loss"]       → 0 decoders
+        layout = [["embedding"], ["decoder"] * 7, [], ["loss"]]
+        Utils.fake_initialize_model_parallel(
+            pipeline_model_parallel_size=2, virtual_pipeline_model_parallel_size=2
+        )
+        cfg = _make_layout_config(
+            num_layers=7, pp_size=2, layout=layout, enable_hyper_connections=True
+        )
+
+        expected = {(0, 0): 0, (0, 1): 0, (1, 0): 7, (1, 1): 0}
+        for pp_rank in range(2):
+            parallel_state.set_pipeline_model_parallel_rank(pp_rank)
+            for vp in range(2):
+                n = get_num_layers_to_build(cfg, vp_stage=vp)
+                assert n == expected[(pp_rank, vp)]
+                assert n >= 0
+
+    def test_mhc_does_not_alter_layout_layer_counts(self):
+        """Same layout gives identical layer counts with and without mHC."""
+        layout = [
+            ["embedding", "decoder", "decoder"],
+            ["decoder"] * 4,
+            ["decoder"],
+            ["decoder", "loss"],
+        ]
+        Utils.fake_initialize_model_parallel(
+            pipeline_model_parallel_size=2, virtual_pipeline_model_parallel_size=2
+        )
+        cfg_mhc = _make_layout_config(
+            num_layers=8, pp_size=2, layout=layout, enable_hyper_connections=True
+        )
+        cfg_no_mhc = _make_layout_config(
+            num_layers=8, pp_size=2, layout=layout, enable_hyper_connections=False
+        )
+
+        for pp_rank in range(2):
+            parallel_state.set_pipeline_model_parallel_rank(pp_rank)
+            for vp in range(2):
+                n_mhc = get_num_layers_to_build(cfg_mhc, vp_stage=vp)
+                n_no_mhc = get_num_layers_to_build(cfg_no_mhc, vp_stage=vp)
+                assert (
+                    n_mhc == n_no_mhc
+                ), f"pp_rank={pp_rank}, vp={vp}: mHC={n_mhc} != no-mHC={n_no_mhc}"
+
+
+class TestFlexibleVPPLayoutShapeConsistencyWithMHC:
+    """
+    Verify that P2P tensor shapes are consistent (send == recv) between
+    consecutive PP stages when using flexible VPP layout + mHC.
+    This is critical: a shape mismatch causes hangs or crashes.
+    """
+
+    def test_pp2_flexible_vpp_mhc_send_recv_match(self):
+        """PP=2 with flexible VPP layout + mHC: rank 0 send == rank 1 recv."""
+        H, N = 64, 4
+        cfg = _make_layout_config(
+            hidden_size=H,
+            num_layers=7,
+            pp_size=2,
+            layout=[["embedding"], ["decoder"] * 6, ["decoder"], ["loss"]],
+            enable_hyper_connections=True,
+            num_residual_streams=N,
+        )
+        shapes = _get_send_recv_shapes(cfg, pp_size=2)
+        assert (
+            shapes[0][0] == shapes[1][1]
+        ), f"rank 0 send {shapes[0][0]} != rank 1 recv {shapes[1][1]}"
+        # rank 0 (first) sends n*C
+        assert shapes[0][0] == [(32, 2, H * N)]
+        # rank 1 (last) sends C
+        assert shapes[1][0] == [(32, 2, H)]
+
+    def test_pp4_flexible_vpp_mhc_all_consecutive_match(self):
+        """PP=4 with flexible VPP layout + mHC: send[i] == recv[i+1] for all i."""
+        H, N = 64, 4
+        layout = [
+            ["embedding"],
+            ["decoder"] * 2,
+            ["decoder"],
+            ["decoder"],
+            ["decoder"],
+            ["decoder"],
+            ["decoder"],
+            ["decoder", "loss"],
+        ]
+        cfg = _make_layout_config(
+            hidden_size=H,
+            num_layers=8,
+            pp_size=4,
+            layout=layout,
+            enable_hyper_connections=True,
+            num_residual_streams=N,
+        )
+        shapes = _get_send_recv_shapes(cfg, pp_size=4)
+        for i in range(3):
+            assert (
+                shapes[i][0] == shapes[i + 1][1]
+            ), f"rank {i} send {shapes[i][0]} != rank {i+1} recv {shapes[i+1][1]}"
+
+        # First stage sends n*C, intermediate stages send/recv n*C, last stage sends C
+        assert shapes[0][0] == [(32, 2, H * N)]
+        for i in (1, 2):
+            assert shapes[i][0] == [(32, 2, H * N)]
+            assert shapes[i][1] == [(32, 2, H * N)]
+        assert shapes[3][0] == [(32, 2, H)]
+        assert shapes[3][1] == [(32, 2, H * N)]
+
+    def test_pp2_flexible_vpp_no_mhc_baseline(self):
+        """Baseline: PP=2 with flexible VPP layout, no mHC — all shapes are C."""
+        H = 64
+        cfg = _make_layout_config(
+            hidden_size=H,
+            num_layers=7,
+            pp_size=2,
+            layout=[["embedding"], ["decoder"] * 6, ["decoder"], ["loss"]],
+            enable_hyper_connections=False,
+        )
+        shapes = _get_send_recv_shapes(cfg, pp_size=2)
+        for i in range(1):
+            assert shapes[i][0] == shapes[i + 1][1]
+            assert shapes[i][0] == [(32, 2, H)]
+
+    def test_pp4_flexible_vpp_mhc_uneven_layers_shape_consistent(self):
+        """Highly uneven layout: shapes must still match between stages."""
+        H, N = 64, 4
+        layout = [["embedding", "decoder"], ["decoder"] * 5, ["decoder"], ["decoder", "loss"]]
+        cfg = _make_layout_config(
+            hidden_size=H,
+            num_layers=8,
+            pp_size=2,
+            layout=layout,
+            enable_hyper_connections=True,
+            num_residual_streams=N,
+        )
+        shapes = _get_send_recv_shapes(cfg, pp_size=2)
+        assert (
+            shapes[0][0] == shapes[1][1]
+        ), f"rank 0 send {shapes[0][0]} != rank 1 recv {shapes[1][1]}"
diff --git a/tests/unit_tests/ssm/test_gated_delta_net.py b/tests/unit_tests/ssm/test_gated_delta_net.py
index 1ccc70a2327..7d65af8a95e 100644
--- a/tests/unit_tests/ssm/test_gated_delta_net.py
+++ b/tests/unit_tests/ssm/test_gated_delta_net.py
@@ -1,5 +1,6 @@
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
+from functools import partial
 from unittest import mock
 
 import pytest
@@ -30,6 +31,8 @@
     init_checkpointing_mock_args,
 )
 from tests.unit_tests.test_utilities import Utils
+from tests.unit_tests.transformer.test_attention import _test_parallel_attention_correctness
+from tests.unit_tests.transformer.test_multi_latent_attention import make_test_packed_seq_params
 
 try:
     import fla
@@ -41,12 +44,7 @@
 
 @pytest.mark.parametrize(
     ("tp_size", "sp", "cp_size"),
-    [
-        (1, False, 1),
-        (2, False, 1),
-        (2, True, 1),
-        # GDN does not support CP for now. Leave it for future work.
-    ],
+    [(1, False, 1), (2, False, 1), (2, True, 1), (1, False, 2), (2, False, 2), (2, True, 2)],
 )
 @pytest.mark.skipif(not HAVE_FLA, reason="FLA is not installed.")
 @pytest.mark.internal
@@ -141,58 +139,127 @@ def test_gpu_forward(self):
             output.dtype == hidden_states.dtype
         ), f"Output dtype {output.dtype=} mismatch with {hidden_states.dtype=}"
 
+    def test_jit_compiled_helpers(self):
+        import torch._dynamo
+
+        gdn = self.gdn
+        batch = 2
+        seq_len = 16
+
+        num_v_heads_local = gdn.num_value_heads // gdn.tp_size // gdn.cp_size
+
+        qkv_last_dim = (2 * gdn.qk_dim_local_tp + gdn.v_dim_local_tp) // gdn.cp_size
+        qkv = torch.randn(
+            batch, seq_len, qkv_last_dim, device=torch.cuda.current_device(), dtype=torch.bfloat16
+        )
+        gate = torch.randn(
+            batch,
+            seq_len,
+            num_v_heads_local,
+            gdn.value_head_dim,
+            device=torch.cuda.current_device(),
+            dtype=torch.bfloat16,
+        )
+        beta = torch.randn(
+            batch,
+            seq_len,
+            num_v_heads_local,
+            device=torch.cuda.current_device(),
+            dtype=torch.bfloat16,
+        )
+        alpha = torch.randn(
+            batch,
+            seq_len,
+            num_v_heads_local,
+            device=torch.cuda.current_device(),
+            dtype=torch.bfloat16,
+        )
+
+        # Disable dynamo so coverage.py can trace through the method bodies,
+        # which are normally wrapped by @jit_fuser (torch.compile).
+        with torch._dynamo.config.patch(disable=True):
+            query, key, value, gate_out, beta_out, alpha_out = (
+                gdn._prepare_qkv_for_gated_delta_rule(qkv, gate, beta, alpha, batch, seq_len)
+            )
+
+        assert query.shape == (batch, seq_len, num_v_heads_local, gdn.key_head_dim)
+        assert key.shape == (batch, seq_len, num_v_heads_local, gdn.key_head_dim)
+        assert value.shape == (batch, seq_len, num_v_heads_local, gdn.value_head_dim)
+        assert query.is_contiguous()
+        assert key.is_contiguous()
+        assert value.is_contiguous()
+
+        A_log_mock = torch.randn(
+            num_v_heads_local, device=torch.cuda.current_device(), dtype=torch.bfloat16
+        )
+        dt_bias_mock = torch.randn(
+            num_v_heads_local, device=torch.cuda.current_device(), dtype=torch.bfloat16
+        )
+
+        with torch._dynamo.config.patch(disable=True):
+            g, beta_sig = gdn._compute_g_and_beta(A_log_mock, dt_bias_mock, alpha, beta)
+
+        assert g.dtype == torch.float32
+        assert g.shape == alpha.shape
+        assert beta_sig.shape == beta.shape
+
+    def test_gpu_forward_thd_correctness(self):
+        if self.sp_size > 1:
+            pytest.skip("Sequence parallel is not supported for this test case.")
+
+        atol, rtol = 3e-4, 3e-4
+
+        # Input shape
+        sequence_length = 32
+        micro_batch_size = 4
+        cu_seqlens = [0, 32, 64, 96, 128]
+        # sbhd input shape: [sequence length, batch size, hidden size]
+        sub_sequence_length = sequence_length // self.cp_size
+        hidden_states_sbhd = torch.rand(
+            (sub_sequence_length, micro_batch_size, self.gdn.config.hidden_size)
+        )
+        attention_mask_sbhd = None
+        hidden_states_sbhd = hidden_states_sbhd.cuda().bfloat16()
+        # thd input shape: [sequence length * batch size, 1, hidden size]
+        hidden_states_thd = hidden_states_sbhd.transpose(0, 1).contiguous()
+        hidden_states_thd = hidden_states_thd.view(-1, 1, self.gdn.config.hidden_size)
+        attention_mask_thd = None
+        packed_seq_params = make_test_packed_seq_params(cu_seqlens=cu_seqlens)
+
+        # THD format
+        output_thd, _ = self.gdn(
+            hidden_states_thd, attention_mask_thd, packed_seq_params=packed_seq_params
+        )
+        # SBHD format
+        output_sbhd, _ = self.gdn(hidden_states_sbhd, attention_mask_sbhd)
+        output_sbhd_T = output_sbhd.transpose(0, 1).contiguous().view(*output_thd.shape)
+
+        rank = torch.distributed.get_rank()
+        assert output_thd.shape[0] == sub_sequence_length * micro_batch_size
+        assert output_thd.shape[1] == 1
+        assert output_thd.shape[2] == self.gdn.config.hidden_size
+        torch.testing.assert_close(
+            output_sbhd_T,
+            output_thd,
+            atol=atol,
+            rtol=rtol,
+            msg=lambda msg: f"Output mismatch ({rank=}): {msg}",
+        )
+
 
+@pytest.mark.parametrize("sequence_packing", [False, True])
 @pytest.mark.parametrize(
     ("tp", "sp", "cp"),
     [
         (4, False, 1),  # TP w/o SP
         (4, True, 1),  # TP w/ SP
-        # CP does not support GDN for now. Add it once it is supported.
+        (1, False, 2),  # CP
+        (2, False, 2),  # TP w/o SP + CP
+        (2, True, 2),  # TP w/ SP + CP
     ],
 )
 @pytest.mark.skipif(not HAVE_FLA, reason="FLA is not installed.")
-def test_parallel_gated_delta_net_correctness(tmp_path_dist_ckpt, tp, sp, cp):
-    # Constants
-    seed = 123
-    sequence_length = 256
-    micro_batch_size = 4
-    hidden_size = 128
-
-    # Model initialization function
-    def initialize_gpt_model(
-        config, pre_process=True, post_process=True, vp_stage=None, pg_collection=None
-    ):
-        layer_spec = get_transformer_block_with_experimental_attention_variant_spec(
-            config=config, vp_stage=None, pp_rank=None
-        )
-        gpt_model = GPTModel(
-            config=config,
-            transformer_layer_spec=layer_spec,
-            vocab_size=128,
-            max_sequence_length=sequence_length,
-            pre_process=pre_process,
-            post_process=post_process,
-            vp_stage=vp_stage,
-            pg_collection=pg_collection,
-        )
-        return gpt_model
-
-    # Initialize baseline parallel state
-    Utils.initialize_model_parallel(
-        tensor_model_parallel_size=1, pipeline_model_parallel_size=1, context_parallel_size=1
-    )
-
-    # Initialize input hidden states
-    torch.manual_seed(seed)
-    model_parallel_cuda_manual_seed(seed)
-    input_hidden_states = (
-        torch.rand((sequence_length, micro_batch_size, hidden_size))
-        .cuda()
-        .bfloat16()
-        .requires_grad_(True)
-    )
-
-    # Initialize transformer config
+def test_parallel_gated_delta_net_correctness(tmp_path_dist_ckpt, sequence_packing, tp, sp, cp):
     transformer_config = TransformerConfig(
         hidden_size=128,
         linear_conv_kernel_dim=2,
@@ -212,118 +279,26 @@ def initialize_gpt_model(
         transformer_impl="transformer_engine",
     )
 
-    with TempNamedDir(tmp_path_dist_ckpt / 'test_parallel_gdn', sync=True) as ckpt_dir:
-        # Set argument
-        mock_args = parse_args(ignore_unknown_args=True)
-        set_args(mock_args)
-
-        # Initialize baseline model
-        init_basic_mock_args(mock_args, 1, 1, bf16=True)
-        mock_args.context_parallel_size = 1
-        mock_args.sequence_parallel = 1
-        gpt_model = unwrap_model(get_model(initialize_gpt_model, config=transformer_config))
-
-        # Initialize args and save checkpoint
-        init_checkpointing_mock_args(mock_args, ckpt_dir, False)
-        mock_args.no_save_optim = True
-        mock_args.no_save_rng = True
-        mock_args.no_load_optim = True
-        mock_args.no_load_rng = True
-        save_checkpoint(10, gpt_model, None, None, 0)
-
-        # Calculate baseline output
-        attention = gpt_model[0].decoder.layers[0].self_attention
-        output_hidden_states_baseline, bias_hidden_states_baseline = attention(
-            input_hidden_states, attention_mask=None
-        )
-        output_hidden_states_baseline.sum().backward()
-
-        # Save baseline output
-        input_grad_baseline = input_hidden_states.grad.detach()
-        output_hidden_states_baseline = output_hidden_states_baseline.detach()
-
-        # Initialize parallel model
-        Utils.destroy_model_parallel()
-        Utils.initialize_model_parallel(
-            tensor_model_parallel_size=tp, pipeline_model_parallel_size=1, context_parallel_size=cp
-        )
-        torch.manual_seed(seed)
-        model_parallel_cuda_manual_seed(seed)
-        transformer_config.context_parallel_size = cp
-        transformer_config.tensor_model_parallel_size = tp
-        transformer_config.sequence_parallel = sp
-        init_basic_mock_args(mock_args, tp, 1, bf16=True)
-        mock_args.context_parallel_size = cp
-        mock_args.sequence_parallel = sp
-        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
-        pg_collection.embd = parallel_state.get_embedding_group()
-        gpt_model = unwrap_model(
-            get_model(initialize_gpt_model, config=transformer_config, pg_collection=pg_collection)
-        )
-        with mock.patch('megatron.training.checkpointing.check_checkpoint_args'):
-            with mock.patch('megatron.training.checkpointing.update_num_microbatches'):
-                load_checkpoint(gpt_model, None, None)
-
-        # Function to get tensor on this tp and cp rank
-        cp_group = parallel_state.get_context_parallel_group()
-        tp_rank = parallel_state.get_tensor_model_parallel_rank()
-
-        def get_tensor_on_this_rank(tensor):
-            if cp > 1:
-                tensor = get_tensor_on_this_cp_rank(tensor, 0, cp_group)
-            if tp > 1 and sp:
-                sp_seg = sequence_length // tp // cp
-                tensor = tensor[tp_rank * sp_seg : (tp_rank + 1) * sp_seg]
-            return tensor
-
-        # Calculate parallel model output
-        input_hidden_states = get_tensor_on_this_rank(input_hidden_states)
-        input_hidden_states = input_hidden_states.detach().requires_grad_(True)
-        parallel_attention = gpt_model[0].decoder.layers[0].self_attention
-        output_hidden_states_parallel, bias_hidden_states_parallel = parallel_attention(
-            input_hidden_states, attention_mask=None
-        )
-        output_hidden_states_parallel.sum().backward()
-        input_grad_parallel = input_hidden_states.grad.detach()
-
-        # Check if the output is the same
-        if cp:
-            atol, rtol = 5e-3, 5e-3
-        else:
-            atol, rtol = 5e-4, 5e-4
-        output_hidden_states_baseline = get_tensor_on_this_rank(output_hidden_states_baseline)
-        input_grad_baseline = get_tensor_on_this_rank(input_grad_baseline)
-
-        assert torch.all(
-            ~torch.isnan(output_hidden_states_baseline)
-        ), "output_hidden_states_baseline contains nan"
-        assert torch.all(
-            ~torch.isinf(output_hidden_states_baseline)
-        ), "output_hidden_states_baseline contains inf"
-        assert torch.all(~torch.isnan(input_grad_baseline)), "input_grad_baseline contains nan"
-        assert torch.all(~torch.isinf(input_grad_baseline)), "input_grad_baseline contains inf"
-        assert torch.all(
-            ~torch.isnan(output_hidden_states_parallel)
-        ), "output_hidden_states_parallel contains nan"
-        assert torch.all(
-            ~torch.isinf(output_hidden_states_parallel)
-        ), "output_hidden_states_parallel contains inf"
-        assert torch.all(~torch.isnan(input_grad_parallel)), "input_grad_parallel contains nan"
-        assert torch.all(~torch.isinf(input_grad_parallel)), "input_grad_parallel contains inf"
-
-        torch.testing.assert_close(
-            output_hidden_states_baseline,
-            output_hidden_states_parallel,
-            atol=atol,
-            rtol=rtol,
-            msg=lambda msg: f"Mismatch in output_hidden_states: {msg}",
-        )
-        torch.testing.assert_close(
-            input_grad_baseline,
-            input_grad_parallel,
-            atol=atol,
-            rtol=rtol,
-            msg=lambda msg: f"Mismatch in input_grad: {msg}",
-        )
+    transformer_layer_spec = get_transformer_block_with_experimental_attention_variant_spec(
+        config=transformer_config, vp_stage=None, pp_rank=0
+    )
 
-        Utils.destroy_model_parallel()
+    if cp:
+        atol, rtol = 5e-3, 5e-3
+    else:
+        atol, rtol = 5e-4, 5e-4
+
+    _test_parallel_attention_correctness(
+        transformer_config=transformer_config,
+        transformer_layer_spec=transformer_layer_spec,
+        tmp_path_dist_ckpt=tmp_path_dist_ckpt,
+        atol=atol,
+        rtol=rtol,
+        tp=tp,
+        sp=sp,
+        cp=cp,
+        seed=123,
+        sequence_length=256,
+        micro_batch_size=4,
+        sequence_packing=sequence_packing,
+    )
diff --git a/tests/unit_tests/tensor_parallel/test_tp_attrs_without_init.py b/tests/unit_tests/tensor_parallel/test_tp_attrs_without_init.py
new file mode 100644
index 00000000000..f7a518e8e88
--- /dev/null
+++ b/tests/unit_tests/tensor_parallel/test_tp_attrs_without_init.py
@@ -0,0 +1,87 @@
+import pytest
+import torch
+
+from megatron.core.tensor_parallel.layers import (
+    ColumnParallelLinear,
+    RowParallelLinear,
+    VocabParallelEmbedding,
+)
+from megatron.core.transformer.transformer_config import TransformerConfig
+from tests.unit_tests.test_utilities import Utils
+
+
+class TestTPAttributesWithoutInitialization:
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    @pytest.mark.parametrize("use_cpu_init", [True, False])
+    def test_vocab_parallel_embedding_tp_attrs_no_init(self, use_cpu_init):
+        Utils.initialize_model_parallel(tensor_model_parallel_size=2)
+        cfg = TransformerConfig(
+            num_layers=1,
+            hidden_size=8,
+            num_attention_heads=4,
+            use_cpu_initialization=use_cpu_init,
+            perform_initialization=False,
+        )
+
+        emb = VocabParallelEmbedding(
+            num_embeddings=16, embedding_dim=8, init_method=cfg.init_method, config=cfg
+        )
+        w = emb.weight
+        assert hasattr(w, "tensor_model_parallel") and w.tensor_model_parallel is True
+        assert hasattr(w, "partition_dim") and w.partition_dim == 0
+        assert hasattr(w, "partition_stride") and w.partition_stride == 1
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    @pytest.mark.parametrize("use_cpu_init", [True, False])
+    def test_column_parallel_linear_tp_attrs_no_init(self, use_cpu_init):
+        Utils.initialize_model_parallel(tensor_model_parallel_size=2)
+        cfg = TransformerConfig(
+            num_layers=1,
+            hidden_size=8,
+            num_attention_heads=4,
+            use_cpu_initialization=use_cpu_init,
+            perform_initialization=False,
+        )
+
+        layer = ColumnParallelLinear(
+            input_size=8,
+            output_size=8,
+            init_method=cfg.init_method,
+            bias=True,
+            config=cfg,
+            skip_bias_add=False,
+        )
+        w = layer.weight
+        assert hasattr(w, "tensor_model_parallel") and w.tensor_model_parallel is True
+        assert hasattr(w, "partition_dim") and w.partition_dim == 0
+        assert hasattr(w, "partition_stride") and w.partition_stride == 1
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    @pytest.mark.parametrize("use_cpu_init", [True, False])
+    def test_row_parallel_linear_tp_attrs_no_init(self, use_cpu_init):
+        Utils.initialize_model_parallel(tensor_model_parallel_size=2)
+        cfg = TransformerConfig(
+            num_layers=1,
+            hidden_size=8,
+            num_attention_heads=4,
+            use_cpu_initialization=use_cpu_init,
+            perform_initialization=False,
+        )
+
+        layer = RowParallelLinear(
+            input_size=8,
+            output_size=8,
+            init_method=cfg.init_method,
+            bias=True,
+            input_is_parallel=True,
+            config=cfg,
+            skip_bias_add=False,
+        )
+        w = layer.weight
+        assert hasattr(w, "tensor_model_parallel") and w.tensor_model_parallel is True
+        assert hasattr(w, "partition_dim") and w.partition_dim == 1
+        assert hasattr(w, "partition_stride") and w.partition_stride == 1
diff --git a/tests/unit_tests/test_emerging_optimizers.py b/tests/unit_tests/test_emerging_optimizers.py
new file mode 100644
index 00000000000..53d780fd832
--- /dev/null
+++ b/tests/unit_tests/test_emerging_optimizers.py
@@ -0,0 +1,1574 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import os
+
+import pytest
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from packaging.version import Version
+
+from megatron.core import parallel_state
+from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
+from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
+from megatron.core.optimizer.emerging_optimizers import (
+    HAVE_EMERGING_OPTIMIZERS,
+    TensorParallelAdaptiveMuon,
+    TensorParallelMuon,
+)
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.transformer import TransformerConfig
+from tests.unit_tests.test_utilities import Utils
+
+if HAVE_EMERGING_OPTIMIZERS:
+    from emerging_optimizers.scalar_optimizers import Lion
+    from emerging_optimizers.soap import SOAP
+else:
+    SOAP = None
+    Lion = None
+
+# Skip all tests in this file for LTS versions
+pytestmark = pytest.mark.skipif(
+    Version(os.getenv('NVIDIA_PYTORCH_VERSION', "24.01")) <= Version("25.05"),
+    reason="Skip emerging optimizer tests for LTS test",
+)
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.fc1 = nn.Linear(80, 48)
+        self.fc2 = nn.Linear(48, 32)
+        self.fc3 = nn.Linear(32, 24)
+        self.fc4 = nn.Linear(24, 16)
+        self.fc5 = nn.Linear(16, 10)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = F.relu(self.fc3(x))
+        x = F.relu(self.fc4(x))
+        x = self.fc5(x)
+        return x
+
+
+# ===========================================================================
+# Muon optimizer tests
+# ===========================================================================
+
+
+def test_muon_optimizer_smoke():
+    """Smoke test for TensorParallelMuon optimizer."""
+    # Create a simple linear model for testing
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    # Create TensorParallelMuon optimizer
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum=0.95,
+        nesterov=True,
+        weight_decay=0.01,
+        use_decoupled_weight_decay=True,
+        split_qkv=False,
+        fp32_matmul_prec="medium",
+        num_ns_steps=5,
+        scale_mode="spectral",
+        extra_scale_factor=1.0,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    # Test basic properties
+    assert optimizer is not None, "Optimizer should not be None"
+    assert hasattr(optimizer, 'param_groups'), "Optimizer should have param_groups"
+    assert len(optimizer.param_groups) > 0, "Optimizer should have at least one parameter group"
+
+    # Test forward and backward pass
+    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    # Store original weight
+    original_weight = model.weight.data.clone()
+
+    # Test optimizer step
+    optimizer.step()
+
+    # Verify weight was updated
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), "Weight should be updated after optimizer step"
+
+    # Test zero_grad
+    optimizer.zero_grad()
+    assert model.weight.grad is None or torch.all(
+        model.weight.grad == 0
+    ), "Gradients should be zeroed"
+
+    # Test state_dict and load_state_dict
+    state_dict = optimizer.state_dict()
+    assert 'state' in state_dict, "State dict should contain state"
+    assert 'param_groups' in state_dict, "State dict should contain param_groups"
+
+    # Load state dict should not raise error
+    optimizer.load_state_dict(state_dict)
+
+
+@pytest.mark.skipif(
+    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
+)
+class TestMuonOptimizerMultiRank:
+    """Test class for Muon optimizer with multi-rank setup."""
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        """Setup and teardown for each test."""
+        Utils.initialize_model_parallel()
+        yield
+        Utils.destroy_model_parallel()
+
+    def create_ddp_model(self, model):
+        """Wrap model in DDP.
+
+        Args:
+            model: Model to wrap
+
+        Returns:
+            DDP-wrapped model
+        """
+        ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
+        return DistributedDataParallel(
+            TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+        )
+
+    def test_get_megatron_optimizer_smoke(self):
+        """Smoke test for get_megatron_optimizer function."""
+        model = Net().bfloat16().cuda()
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        # Ensure all parameters require gradients
+        for param in model.parameters():
+            assert param.requires_grad, "All parameters should require gradients"
+
+        # Create optimizer config for Muon
+        optimizer_config = OptimizerConfig(
+            optimizer='muon',  # This will be changed internally to 'adam' for non-linear params
+            lr=0.01,
+            weight_decay=0.01,
+            bf16=True,
+            use_distributed_optimizer=False,  # Muon doesn't support distributed optimizer
+            muon_momentum=0.95,
+            muon_nesterov=True,
+            muon_fp32_matmul_prec="medium",
+            muon_num_ns_steps=5,
+            muon_scale_mode="spectral",
+            muon_tp_mode="duplicated",
+        )
+
+        # Test creating the optimizer
+        optimizer = get_megatron_optimizer(
+            config=optimizer_config, model_chunks=[model], use_gloo_process_groups=True
+        )
+
+        # Test basic properties
+        assert optimizer is not None, "Optimizer should not be None"
+        assert hasattr(optimizer, 'param_groups'), "Optimizer should have param_groups"
+        assert hasattr(optimizer, 'chained_optimizers'), "Should be a ChainedOptimizer"
+        assert len(optimizer.chained_optimizers) >= 1, "Should have at least one chained optimizer"
+
+        # Test forward and backward pass
+        input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        # Store original parameters
+        original_params = {}
+        for name, param in model.named_parameters():
+            original_params[name] = param.data.clone()
+
+        # Test optimizer step
+        optimizer.step()
+
+        # Verify at least some parameters were updated
+        params_updated = 0
+        for name, param in model.named_parameters():
+            if not torch.equal(param.data, original_params[name]):
+                params_updated += 1
+
+        assert params_updated > 0, "At least some parameters should be updated after optimizer step"
+
+        # Test zero_grad
+        optimizer.zero_grad()
+        for param in model.parameters():
+            assert param.grad is None or torch.all(
+                param.grad == 0
+            ), f"Gradients should be zeroed for all parameters"
+
+        # Test state_dict and load_state_dict
+        state_dict = optimizer.state_dict()
+        assert isinstance(state_dict, list), "State dict should be a list"
+
+        # Load state dict should not raise error
+        optimizer.load_state_dict(state_dict)
+
+    def test_get_megatron_optimizer_validation(self):
+        """Test validation logic for get_megatron_optimizer."""
+        model = torch.nn.Linear(100, 50, bias=False, dtype=torch.bfloat16, device='cuda')
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        # Test 1: FP16 should raise exception
+        optimizer_config_fp16 = OptimizerConfig(
+            optimizer='muon',
+            lr=0.01,
+            fp16=True,  # This should cause an exception
+            use_distributed_optimizer=False,
+        )
+
+        with pytest.raises(Exception, match='emerging optimizer with fp16 is not supported'):
+            get_megatron_optimizer(config=optimizer_config_fp16, model_chunks=[model])
+
+        # Test 3: Invalid num_ns_steps should raise exception
+        optimizer_config_invalid_ns = OptimizerConfig(
+            optimizer='muon',
+            lr=0.01,
+            bf16=True,
+            use_distributed_optimizer=False,
+            muon_num_ns_steps=0,  # This should cause an exception
+        )
+
+        with pytest.raises(ValueError, match='num_ns_steps must be at least 1'):
+            get_megatron_optimizer(config=optimizer_config_invalid_ns, model_chunks=[model])
+
+    def test_get_megatron_optimizer_layer_wise(self):
+        """Test get_megatron_optimizer with layer-wise distributed optimizer."""
+        model = Net().bfloat16().cuda()
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        optimizer_config = OptimizerConfig(
+            optimizer='muon',
+            lr=0.01,
+            weight_decay=0.01,
+            bf16=True,
+            use_layer_wise_distributed_optimizer=True,
+            muon_momentum=0.95,
+            muon_nesterov=True,
+            muon_fp32_matmul_prec="medium",
+            muon_num_ns_steps=5,
+            muon_scale_mode="spectral",
+            muon_tp_mode="duplicated",
+        )
+
+        # use_layer_wise_distributed_optimizer=True triggers LayerWiseDistributedOptimizer
+        optimizer = get_megatron_optimizer(
+            config=optimizer_config, model_chunks=[model], use_gloo_process_groups=True
+        )
+
+        # Verify it's a LayerWiseDistributedOptimizer
+        from megatron.core.optimizer.layer_wise_optimizer import LayerWiseDistributedOptimizer
+
+        assert isinstance(
+            optimizer, LayerWiseDistributedOptimizer
+        ), "Should return LayerWiseDistributedOptimizer"
+
+        # Test forward and backward pass
+        input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        # Test optimizer step
+        update_successful, grad_norm, num_zeros = optimizer.step()
+
+        assert update_successful, "Optimizer step should be successful"
+        assert grad_norm is not None or grad_norm is None, "Grad norm should be returned"
+
+
+@pytest.mark.parametrize("mode", ["duplicated", "blockwise", "distributed"])
+def test_muon_optimizer_different_modes_single_rank(mode):
+    """Test TensorParallelMuon optimizer with different modes on single rank.
+
+    When TP size is 1, all modes should produce the same result.
+    """
+    # Set random seed for reproducibility
+    torch.manual_seed(42)
+    torch.cuda.manual_seed(42)
+
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.normal_(0, 0.02)
+
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum=0.95,
+        weight_decay=0.0,  # Disable weight decay for deterministic comparison
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode=mode,
+    )
+
+    # Use fixed input for deterministic results
+    torch.manual_seed(42)
+    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    # Verify weight was updated
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with mode={mode}"
+
+
+@pytest.mark.skipif(
+    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
+)
+class TestMuonOptimizerMultiRankTP:
+    """Test class for Muon optimizer with multi-rank and tensor parallel setup."""
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        """Setup and teardown for each test with tensor parallel."""
+        world = int(os.getenv('WORLD_SIZE', '1'))
+        Utils.initialize_model_parallel(tensor_model_parallel_size=min(world, 2))
+        yield
+        Utils.destroy_model_parallel()
+
+    def create_tp_model_and_optimizer(self, mode):
+        """Create model with TP and optimizer.
+
+        Args:
+            mode: Muon optimizer mode
+
+        Returns:
+            tuple: (model, optimizer, pg_collection)
+        """
+        rank = int(os.getenv('RANK', '0'))
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+
+        # Create model with partition_dim for TP
+        torch.manual_seed(42 + rank)
+        model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+        model.requires_grad_(True)
+        model.weight.data.normal_(0, 0.02)
+        model.weight.partition_dim = 0  # Set partition dimension for TP
+
+        optimizer = TensorParallelMuon(
+            params=[model.weight],
+            lr=0.01,
+            momentum=0.95,
+            weight_decay=0.0,
+            num_ns_steps=5,
+            pg_collection=pg_collection,
+            tp_mode=mode,
+        )
+
+        return model, optimizer
+
+    @pytest.mark.parametrize("mode", ["duplicated", "distributed"])
+    def test_muon_optimizer_modes_multirank_same_result(self, mode):
+        """Test that duplicated and distributed modes produce same results with TP > 1."""
+        model, optimizer = self.create_tp_model_and_optimizer(mode)
+
+        # Use fixed input for deterministic results
+        torch.manual_seed(42)
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        original_weight = model.weight.data.clone()
+        optimizer.step()
+
+        # Verify weight was updated
+        assert not torch.equal(
+            model.weight.data, original_weight
+        ), f"Weight should be updated with mode={mode}"
+
+    def test_muon_optimizer_blockwise_mode_different_result(self):
+        """Test that blockwise mode produces different results than duplicated/distributed with TP > 1."""
+        model, optimizer = self.create_tp_model_and_optimizer("blockwise")
+
+        # Use fixed input for deterministic results
+        torch.manual_seed(42)
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        original_weight = model.weight.data.clone()
+        optimizer.step()
+
+        # Verify weight was updated
+        assert not torch.equal(
+            model.weight.data, original_weight
+        ), "Weight should be updated with mode=blockwise"
+
+
+@pytest.mark.parametrize(
+    "coefficient_type_and_steps", [("simple", 3), ("quintic", 5), ("polar_express", 8)]
+)
+def test_muon_optimizer_coefficient_types(coefficient_type_and_steps):
+    """Test TensorParallelMuon optimizer with different coefficient types."""
+    model = torch.nn.Linear(80, 40, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        coefficient_type=coefficient_type_and_steps[0],
+        num_ns_steps=coefficient_type_and_steps[1],
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with coefficient_type={coefficient_type_and_steps[0]} and num_ns_steps={coefficient_type_and_steps[1]}"
+
+
+@pytest.mark.parametrize("scale_mode", ["spectral", "unit_rms_norm", "shape_scaling"])
+def test_muon_optimizer_scale_modes(scale_mode):
+    """Test TensorParallelMuon optimizer with different scale modes."""
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        scale_mode=scale_mode,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with scale_mode={scale_mode}"
+
+
+@pytest.mark.parametrize("nesterov", [True, False])
+def test_muon_optimizer_nesterov(nesterov):
+    """Test TensorParallelMuon optimizer with and without Nesterov momentum."""
+    model = torch.nn.Linear(50, 25, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum=0.9,
+        nesterov=nesterov,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    input_tensor = torch.randn(16, 50, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with nesterov={nesterov}"
+
+
+def test_muon_optimizer_multiple_steps():
+    """Test TensorParallelMuon optimizer across multiple optimization steps."""
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum=0.95,
+        weight_decay=0.01,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    weights_history = [model.weight.data.clone()]
+
+    for i in range(3):
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        optimizer.step()
+        optimizer.zero_grad()
+        weights_history.append(model.weight.data.clone())
+
+    # Verify weights changed at each step
+    for i in range(len(weights_history) - 1):
+        assert not torch.equal(
+            weights_history[i], weights_history[i + 1]
+        ), f"Weight should change at step {i}"
+
+
+def test_muon_optimizer_qkv_split():
+    """Test TensorParallelMuon optimizer with QKV splitting."""
+    # Create a model with QKV-like parameter
+    qkv_size = 3 * 64 * 16  # Combined Q, K, V dimensions, 16 heads x 64 per head
+    hidden_size = 1024
+    model = torch.nn.Linear(hidden_size, qkv_size, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    # Mark parameter as QKV
+    model.weight.is_qkv = True
+
+    # QKV split shapes: [Q_size, K_size, V_size]
+    qkv_split_shapes = (64, 64, 64)
+
+    # Test with split_qkv=True
+    optimizer_split = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        split_qkv=True,
+        is_qkv_fn=lambda p: getattr(p, 'is_qkv', False),
+        qkv_split_shapes=qkv_split_shapes,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    input_tensor = torch.randn(16, hidden_size, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer_split.step()
+    weight_with_split = model.weight.data.clone()
+
+    assert not torch.equal(
+        weight_with_split, original_weight
+    ), "QKV weight should be updated with split_qkv=True"
+
+    # Reset model and test with split_qkv=False
+    model.weight.data.fill_(1.0)
+    optimizer_no_split = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        split_qkv=False,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    optimizer_no_split.step()
+    weight_without_split = model.weight.data.clone()
+
+    assert not torch.equal(
+        weight_without_split, original_weight
+    ), "QKV weight should be updated with split_qkv=False"
+
+    # Ensure the two results are different
+    assert not torch.equal(
+        weight_with_split, weight_without_split
+    ), "Weights should be different between split_qkv=True and split_qkv=False"
+
+
+def test_muon_optimizer_extra_scale_factor():
+    """Test TensorParallelMuon optimizer with different extra_scale_factor values."""
+    model = torch.nn.Linear(80, 40, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        extra_scale_factor=2.0,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), "Weight should be updated with extra_scale_factor"
+
+
+@pytest.mark.parametrize("num_ns_steps", [5, 15, 25])
+def test_muon_optimizer_num_ns_steps(num_ns_steps):
+    """Test TensorParallelMuon optimizer with different numbers of Newton-Schulz steps."""
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelMuon(
+        params=[model.weight],
+        lr=0.01,
+        coefficient_type="quintic",
+        num_ns_steps=num_ns_steps,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with num_ns_steps={num_ns_steps}"
+
+
+# ===========================================================================
+# Adaptive Muon optimizer tests
+# ===========================================================================
+
+
+def test_adaptive_muon_optimizer_smoke():
+    """Smoke test for TensorParallelAdaptiveMuon optimizer."""
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum=0.95,
+        nesterov=True,
+        weight_decay=0.01,
+        use_decoupled_weight_decay=True,
+        split_qkv=False,
+        fp32_matmul_prec="medium",
+        num_ns_steps=5,
+        scale_mode="spectral",
+        extra_scale_factor=1.0,
+        pg_collection=None,
+        tp_mode="duplicated",
+        moment2_method="adamuon",
+        beta2=0.95,
+        eps=1e-8,
+    )
+
+    assert optimizer is not None
+    assert hasattr(optimizer, 'param_groups')
+    assert len(optimizer.param_groups) > 0
+
+    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), "Weight should be updated after optimizer step"
+
+    optimizer.zero_grad()
+    assert model.weight.grad is None or torch.all(
+        model.weight.grad == 0
+    ), "Gradients should be zeroed"
+
+    state_dict = optimizer.state_dict()
+    assert 'state' in state_dict
+    assert 'param_groups' in state_dict
+    optimizer.load_state_dict(state_dict)
+
+
+@pytest.mark.parametrize("mode", ["duplicated", "blockwise", "distributed"])
+def test_adaptive_muon_optimizer_different_modes_single_rank(mode):
+    """Test TensorParallelAdaptiveMuon with different modes on single rank."""
+    torch.manual_seed(42)
+    torch.cuda.manual_seed(42)
+
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.normal_(0, 0.02)
+
+    optimizer = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum=0.95,
+        weight_decay=0.0,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode=mode,
+    )
+
+    torch.manual_seed(42)
+    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with mode={mode}"
+
+
+@pytest.mark.parametrize("moment2_method", ["adamuon", "normuon"])
+def test_adaptive_muon_optimizer_moment2_methods(moment2_method):
+    """Test TensorParallelAdaptiveMuon with different moment2 methods."""
+    model = torch.nn.Linear(80, 40, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+        moment2_method=moment2_method,
+    )
+
+    input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with moment2_method={moment2_method}"
+
+
+@pytest.mark.parametrize("beta2", [0.5, 0.95, 0.999])
+def test_adaptive_muon_optimizer_beta2(beta2):
+    """Test TensorParallelAdaptiveMuon with different beta2 values."""
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+        beta2=beta2,
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with beta2={beta2}"
+
+
+def test_adaptive_muon_optimizer_multiple_steps():
+    """Test TensorParallelAdaptiveMuon across multiple optimization steps."""
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum=0.95,
+        weight_decay=0.01,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    weights_history = [model.weight.data.clone()]
+
+    for i in range(3):
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        optimizer.step()
+        optimizer.zero_grad()
+        weights_history.append(model.weight.data.clone())
+
+    for i in range(len(weights_history) - 1):
+        assert not torch.equal(
+            weights_history[i], weights_history[i + 1]
+        ), f"Weight should change at step {i}"
+
+
+@pytest.mark.parametrize("nesterov", [True, False])
+def test_adaptive_muon_optimizer_nesterov(nesterov):
+    """Test TensorParallelAdaptiveMuon with and without Nesterov momentum."""
+    model = torch.nn.Linear(50, 25, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        momentum=0.9,
+        nesterov=nesterov,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    input_tensor = torch.randn(16, 50, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with nesterov={nesterov}"
+
+
+def test_adaptive_muon_optimizer_qkv_split():
+    """Test TensorParallelAdaptiveMuon with QKV splitting."""
+    qkv_size = 3 * 64 * 16  # Combined Q, K, V dimensions
+    hidden_size = 1024
+    model = torch.nn.Linear(hidden_size, qkv_size, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    model.weight.is_qkv = True
+    qkv_split_shapes = (64, 64, 64)
+
+    optimizer_split = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        split_qkv=True,
+        is_qkv_fn=lambda p: getattr(p, 'is_qkv', False),
+        qkv_split_shapes=qkv_split_shapes,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    input_tensor = torch.randn(16, hidden_size, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer_split.step()
+    weight_with_split = model.weight.data.clone()
+
+    assert not torch.equal(
+        weight_with_split, original_weight
+    ), "QKV weight should be updated with split_qkv=True"
+
+    model.weight.data.fill_(1.0)
+    optimizer_no_split = TensorParallelAdaptiveMuon(
+        params=[model.weight],
+        lr=0.01,
+        split_qkv=False,
+        num_ns_steps=5,
+        pg_collection=None,
+        tp_mode="duplicated",
+    )
+
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    optimizer_no_split.step()
+    weight_without_split = model.weight.data.clone()
+
+    assert not torch.equal(
+        weight_without_split, original_weight
+    ), "QKV weight should be updated with split_qkv=False"
+
+    assert not torch.equal(
+        weight_with_split, weight_without_split
+    ), "Weights should be different between split_qkv=True and split_qkv=False"
+
+
+@pytest.mark.skipif(
+    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
+)
+class TestAdaptiveMuonOptimizerMultiRank:
+    """Test class for Adaptive Muon optimizer with multi-rank setup."""
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        """Setup and teardown for each test."""
+        Utils.initialize_model_parallel()
+        yield
+        Utils.destroy_model_parallel()
+
+    def create_ddp_model(self, model):
+        """Wrap model in DDP."""
+        ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
+        return DistributedDataParallel(
+            TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+        )
+
+    def test_get_megatron_optimizer_adaptive_muon_smoke(self):
+        """Smoke test for get_megatron_optimizer with adaptive_muon."""
+        model = Net().bfloat16().cuda()
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        for param in model.parameters():
+            assert param.requires_grad
+
+        optimizer_config = OptimizerConfig(
+            optimizer='adaptive_muon',
+            lr=0.01,
+            weight_decay=0.01,
+            bf16=True,
+            use_distributed_optimizer=False,
+            muon_momentum=0.95,
+            muon_nesterov=True,
+            muon_fp32_matmul_prec="medium",
+            muon_num_ns_steps=5,
+            muon_scale_mode="spectral",
+            muon_tp_mode="duplicated",
+            adaptive_muon_moment2_method="adamuon",
+            adaptive_muon_beta2=0.95,
+            adaptive_muon_eps=1e-8,
+        )
+
+        optimizer = get_megatron_optimizer(
+            config=optimizer_config, model_chunks=[model], use_gloo_process_groups=True
+        )
+
+        assert optimizer is not None
+        assert hasattr(optimizer, 'param_groups')
+        assert hasattr(optimizer, 'chained_optimizers')
+        assert len(optimizer.chained_optimizers) >= 1
+
+        input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        original_params = {}
+        for name, param in model.named_parameters():
+            original_params[name] = param.data.clone()
+
+        optimizer.step()
+
+        params_updated = 0
+        for name, param in model.named_parameters():
+            if not torch.equal(param.data, original_params[name]):
+                params_updated += 1
+
+        assert params_updated > 0, "At least some parameters should be updated after optimizer step"
+
+        optimizer.zero_grad()
+        for param in model.parameters():
+            assert param.grad is None or torch.all(
+                param.grad == 0
+            ), "Gradients should be zeroed for all parameters"
+
+        state_dict = optimizer.state_dict()
+        assert isinstance(state_dict, list)
+        optimizer.load_state_dict(state_dict)
+
+    def test_get_megatron_optimizer_adaptive_muon_validation(self):
+        """Test validation logic for get_megatron_optimizer with adaptive_muon."""
+        model = torch.nn.Linear(100, 50, bias=False, dtype=torch.bfloat16, device='cuda')
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        optimizer_config_fp16 = OptimizerConfig(
+            optimizer='adaptive_muon', lr=0.01, fp16=True, use_distributed_optimizer=False
+        )
+
+        with pytest.raises(Exception, match='emerging optimizer with fp16 is not supported'):
+            get_megatron_optimizer(config=optimizer_config_fp16, model_chunks=[model])
+
+
+@pytest.mark.skipif(
+    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
+)
+class TestAdaptiveMuonOptimizerMultiRankTP:
+    """Test class for Adaptive Muon optimizer with multi-rank and tensor parallel setup."""
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        """Setup and teardown for each test with tensor parallel."""
+        world = int(os.getenv('WORLD_SIZE', '1'))
+        Utils.initialize_model_parallel(tensor_model_parallel_size=min(world, 2))
+        yield
+        Utils.destroy_model_parallel()
+
+    def create_tp_model_and_optimizer(self, mode):
+        """Create model with TP and optimizer."""
+        rank = int(os.getenv('RANK', '0'))
+        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+
+        torch.manual_seed(42 + rank)
+        model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+        model.requires_grad_(True)
+        model.weight.data.normal_(0, 0.02)
+        model.weight.partition_dim = 0
+
+        optimizer = TensorParallelAdaptiveMuon(
+            params=[model.weight],
+            lr=0.01,
+            momentum=0.95,
+            weight_decay=0.0,
+            num_ns_steps=5,
+            pg_collection=pg_collection,
+            tp_mode=mode,
+        )
+
+        return model, optimizer
+
+    @pytest.mark.parametrize("mode", ["duplicated", "distributed"])
+    def test_adaptive_muon_optimizer_modes_multirank_same_result(self, mode):
+        """Test that duplicated and distributed modes produce same results with TP > 1."""
+        model, optimizer = self.create_tp_model_and_optimizer(mode)
+
+        torch.manual_seed(42)
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        original_weight = model.weight.data.clone()
+        optimizer.step()
+
+        assert not torch.equal(
+            model.weight.data, original_weight
+        ), f"Weight should be updated with mode={mode}"
+
+    def test_adaptive_muon_optimizer_blockwise_mode(self):
+        """Test that blockwise mode works with TP > 1."""
+        model, optimizer = self.create_tp_model_and_optimizer("blockwise")
+
+        torch.manual_seed(42)
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        original_weight = model.weight.data.clone()
+        optimizer.step()
+
+        assert not torch.equal(
+            model.weight.data, original_weight
+        ), "Weight should be updated with mode=blockwise"
+
+
+# ===========================================================================
+# SOAP optimizer tests
+# ===========================================================================
+
+skip_no_soap = pytest.mark.skipif(
+    not HAVE_EMERGING_OPTIMIZERS, reason="emerging_optimizers package not installed"
+)
+
+
+@skip_no_soap
+def test_soap_optimizer_smoke():
+    """Smoke test for SOAP optimizer."""
+
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = SOAP(
+        params=[model.weight],
+        lr=0.01,
+        betas=(0.9, 0.999),
+        shampoo_beta=0.95,
+        weight_decay=0.01,
+        precondition_frequency=1,
+    )
+
+    # Test basic properties
+    assert optimizer is not None, "Optimizer should not be None"
+    assert hasattr(optimizer, 'param_groups'), "Optimizer should have param_groups"
+    assert len(optimizer.param_groups) > 0, "Optimizer should have at least one parameter group"
+
+    # Test forward and backward pass
+    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    # Store original weight
+    original_weight = model.weight.data.clone()
+
+    # Test optimizer step
+    optimizer.step()
+
+    # Verify weight was updated
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), "Weight should be updated after optimizer step"
+
+    # Test zero_grad
+    optimizer.zero_grad()
+    assert model.weight.grad is None or torch.all(
+        model.weight.grad == 0
+    ), "Gradients should be zeroed"
+
+    # Test state_dict and load_state_dict
+    state_dict = optimizer.state_dict()
+    assert 'state' in state_dict, "State dict should contain state"
+    assert 'param_groups' in state_dict, "State dict should contain param_groups"
+
+    # Load state dict should not raise error
+    optimizer.load_state_dict(state_dict)
+
+
+@skip_no_soap
+def test_soap_optimizer_multiple_steps():
+    """Test SOAP optimizer across multiple optimization steps."""
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = SOAP(
+        params=[model.weight],
+        lr=0.01,
+        betas=(0.9, 0.999),
+        shampoo_beta=0.95,
+        weight_decay=0.01,
+        precondition_frequency=1,
+    )
+
+    weights_history = [model.weight.data.clone()]
+
+    for i in range(3):
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        optimizer.step()
+        optimizer.zero_grad()
+        weights_history.append(model.weight.data.clone())
+
+    # Verify weights changed at each step
+    for i in range(len(weights_history) - 1):
+        assert not torch.equal(
+            weights_history[i], weights_history[i + 1]
+        ), f"Weight should change at step {i}"
+
+
+@skip_no_soap
+@pytest.mark.parametrize("precondition_frequency", [1, 5, 10])
+def test_soap_optimizer_precondition_frequency(precondition_frequency):
+    """Test SOAP optimizer with different precondition frequencies."""
+
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = SOAP(
+        params=[model.weight],
+        lr=0.01,
+        betas=(0.9, 0.999),
+        shampoo_beta=0.95,
+        precondition_frequency=precondition_frequency,
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with precondition_frequency={precondition_frequency}"
+
+
+@skip_no_soap
+@pytest.mark.parametrize("use_kl_shampoo", [True, False])
+def test_soap_optimizer_kl_shampoo(use_kl_shampoo):
+    """Test SOAP optimizer with and without KL-Shampoo preconditioner."""
+
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = SOAP(
+        params=[model.weight],
+        lr=0.01,
+        betas=(0.9, 0.999),
+        shampoo_beta=0.95,
+        use_kl_shampoo=use_kl_shampoo,
+        precondition_frequency=1,
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with use_kl_shampoo={use_kl_shampoo}"
+
+
+@skip_no_soap
+@pytest.mark.parametrize("shampoo_beta", [0.5, 0.9, 0.99])
+def test_soap_optimizer_shampoo_beta(shampoo_beta):
+    """Test SOAP optimizer with different shampoo_beta values."""
+
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = SOAP(
+        params=[model.weight],
+        lr=0.01,
+        betas=(0.9, 0.999),
+        shampoo_beta=shampoo_beta,
+        precondition_frequency=1,
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with shampoo_beta={shampoo_beta}"
+
+
+@pytest.mark.skipif(
+    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
+)
+class TestSoapOptimizerMultiRank:
+    """Test class for SOAP optimizer with multi-rank setup."""
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        """Setup and teardown for each test."""
+        Utils.initialize_model_parallel()
+        yield
+        Utils.destroy_model_parallel()
+
+    def create_ddp_model(self, model):
+        """Wrap model in DDP."""
+        ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
+        return DistributedDataParallel(
+            TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+        )
+
+    def test_get_megatron_optimizer_soap_smoke(self):
+        """Smoke test for get_megatron_optimizer with SOAP."""
+        model = Net().bfloat16().cuda()
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        for param in model.parameters():
+            assert param.requires_grad, "All parameters should require gradients"
+
+        optimizer_config = OptimizerConfig(
+            optimizer='soap',
+            lr=0.01,
+            weight_decay=0.01,
+            bf16=True,
+            use_distributed_optimizer=False,
+            soap_shampoo_beta=0.95,
+            soap_precondition_frequency=1,
+            soap_use_kl_shampoo=True,
+        )
+
+        optimizer = get_megatron_optimizer(
+            config=optimizer_config, model_chunks=[model], use_gloo_process_groups=True
+        )
+
+        assert optimizer is not None, "Optimizer should not be None"
+        assert hasattr(optimizer, 'param_groups'), "Optimizer should have param_groups"
+        assert hasattr(optimizer, 'chained_optimizers'), "Should be a ChainedOptimizer"
+        assert len(optimizer.chained_optimizers) >= 1, "Should have at least one chained optimizer"
+
+        # Test forward and backward pass
+        input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        # Store original parameters
+        original_params = {}
+        for name, param in model.named_parameters():
+            original_params[name] = param.data.clone()
+
+        # Test optimizer step
+        optimizer.step()
+
+        # Verify at least some parameters were updated
+        params_updated = 0
+        for name, param in model.named_parameters():
+            if not torch.equal(param.data, original_params[name]):
+                params_updated += 1
+
+        assert params_updated > 0, "At least some parameters should be updated after optimizer step"
+
+        # Test zero_grad
+        optimizer.zero_grad()
+        for param in model.parameters():
+            assert param.grad is None or torch.all(
+                param.grad == 0
+            ), "Gradients should be zeroed for all parameters"
+
+        # Test state_dict and load_state_dict
+        state_dict = optimizer.state_dict()
+        assert isinstance(state_dict, list), "State dict should be a list"
+        optimizer.load_state_dict(state_dict)
+
+    def test_get_megatron_optimizer_soap_validation(self):
+        """Test validation logic for get_megatron_optimizer with SOAP."""
+        model = torch.nn.Linear(100, 50, bias=False, dtype=torch.bfloat16, device='cuda')
+        model.requires_grad_(True)
+        model = self.create_ddp_model(model)
+
+        # FP16 should raise exception
+        optimizer_config_fp16 = OptimizerConfig(
+            optimizer='soap', lr=0.01, fp16=True, use_distributed_optimizer=False
+        )
+
+        with pytest.raises(Exception, match='emerging optimizer with fp16 is not supported'):
+            get_megatron_optimizer(config=optimizer_config_fp16, model_chunks=[model])
+
+
+# ===========================================================================
+# Lion optimizer tests
+# ===========================================================================
+
+skip_no_lion = pytest.mark.skipif(
+    not HAVE_EMERGING_OPTIMIZERS, reason="emerging_optimizers package not installed"
+)
+
+
+@skip_no_lion
+def test_lion_optimizer_smoke():
+    """Smoke test for Lion optimizer."""
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = Lion(params=[model.weight], lr=1e-4, betas=(0.9, 0.99), weight_decay=0.01)
+
+    assert optimizer is not None
+    assert hasattr(optimizer, 'param_groups')
+    assert len(optimizer.param_groups) > 0
+
+    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), "Weight should be updated after optimizer step"
+
+    optimizer.zero_grad()
+    assert model.weight.grad is None or torch.all(
+        model.weight.grad == 0
+    ), "Gradients should be zeroed"
+
+    state_dict = optimizer.state_dict()
+    assert 'state' in state_dict
+    assert 'param_groups' in state_dict
+    optimizer.load_state_dict(state_dict)
+
+
+@skip_no_lion
+def test_lion_optimizer_multiple_steps():
+    """Test Lion optimizer across multiple optimization steps."""
+    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = Lion(params=[model.weight], lr=1e-4, betas=(0.9, 0.99), weight_decay=0.01)
+
+    weights_history = [model.weight.data.clone()]
+
+    for i in range(3):
+        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
+        output = model(input_tensor)
+        loss = output.sum()
+        loss.backward()
+
+        optimizer.step()
+        optimizer.zero_grad()
+        weights_history.append(model.weight.data.clone())
+
+    for i in range(len(weights_history) - 1):
+        assert not torch.equal(
+            weights_history[i], weights_history[i + 1]
+        ), f"Weight should change at step {i}"
+
+
+@skip_no_lion
+@pytest.mark.parametrize("betas", [(0.9, 0.99), (0.95, 0.999), (0.5, 0.9)])
+def test_lion_optimizer_betas(betas):
+    """Test Lion optimizer with different beta values."""
+    model = torch.nn.Linear(80, 40, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = Lion(params=[model.weight], lr=1e-4, betas=betas)
+
+    input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with betas={betas}"
+
+
+@skip_no_lion
+@pytest.mark.parametrize("weight_decay", [0.0, 0.01, 0.1])
+def test_lion_optimizer_weight_decay(weight_decay):
+    """Test Lion optimizer with different weight decay values."""
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = Lion(params=[model.weight], lr=1e-4, betas=(0.9, 0.99), weight_decay=weight_decay)
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with weight_decay={weight_decay}"
+
+
+@skip_no_lion
+@pytest.mark.parametrize("weight_decay_method", ["decoupled", "l2"])
+def test_lion_optimizer_weight_decay_method(weight_decay_method):
+    """Test Lion optimizer with different weight decay methods."""
+    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
+    model.requires_grad_(True)
+    model.weight.data.fill_(1.0)
+
+    optimizer = Lion(
+        params=[model.weight],
+        lr=1e-4,
+        betas=(0.9, 0.99),
+        weight_decay=0.01,
+        weight_decay_method=weight_decay_method,
+    )
+
+    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_weight = model.weight.data.clone()
+    optimizer.step()
+
+    assert not torch.equal(
+        model.weight.data, original_weight
+    ), f"Weight should be updated with weight_decay_method={weight_decay_method}"
+
+
+@skip_no_lion
+def test_lion_optimizer_multi_layer_net():
+    """Test Lion optimizer with the multi-layer Net model."""
+    model = Net().cuda()
+    model.requires_grad_(True)
+
+    optimizer = Lion(params=model.parameters(), lr=1e-4, betas=(0.9, 0.99), weight_decay=0.01)
+
+    input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
+    output = model(input_tensor)
+    loss = output.sum()
+    loss.backward()
+
+    original_params = {name: p.data.clone() for name, p in model.named_parameters()}
+    optimizer.step()
+
+    params_updated = 0
+    for name, param in model.named_parameters():
+        if not torch.equal(param.data, original_params[name]):
+            params_updated += 1
+
+    assert params_updated > 0, "At least some parameters should be updated after optimizer step"
diff --git a/tests/unit_tests/test_fp8_param.py b/tests/unit_tests/test_fp8_param.py
index 34b504e21de..e0a71526297 100644
--- a/tests/unit_tests/test_fp8_param.py
+++ b/tests/unit_tests/test_fp8_param.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import contextlib
 import gc
@@ -72,12 +72,12 @@ def setup_method(self, method):
         os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = '1'
 
     def teardown_method(self, method):
-        Utils.destroy_model_parallel()
-        destroy_global_vars()
-        destroy_num_microbatches_calculator()
         if self.cuda_graph_helper is not None and self.cuda_graph_helper.graphs_created():
             self.cuda_graph_helper.delete_cuda_graphs()
             self.cuda_graph_helper = None
+        Utils.destroy_model_parallel()
+        destroy_global_vars()
+        destroy_num_microbatches_calculator()
         gc.collect()
 
     def model_provider(
diff --git a/tests/unit_tests/test_inference.py b/tests/unit_tests/test_inference.py
index 8b3a4a64da4..9474ac0475a 100644
--- a/tests/unit_tests/test_inference.py
+++ b/tests/unit_tests/test_inference.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import argparse
 import unittest.mock
diff --git a/tests/unit_tests/test_layer_wise_optimizer.py b/tests/unit_tests/test_layer_wise_optimizer.py
index c484ca104ee..d8b0e97b524 100644
--- a/tests/unit_tests/test_layer_wise_optimizer.py
+++ b/tests/unit_tests/test_layer_wise_optimizer.py
@@ -417,7 +417,7 @@ def test_bf16_error(self):
             optimizer='muon', lr=0.01, bf16=True, use_distributed_optimizer=False
         )
         with pytest.raises(
-            TypeError, match='LayerWiseDistributedOptimizer received Float16 optimizer already'
+            TypeError, match='LayerWiseDistributedOptimizer expects base torch optimizers'
         ):
             LayerWiseDistributedOptimizer([wrapped_optimizer], lw_config, pg_collection)
 
diff --git a/tests/unit_tests/test_muon_optimizer.py b/tests/unit_tests/test_muon_optimizer.py
deleted file mode 100644
index cc99f7a16e6..00000000000
--- a/tests/unit_tests/test_muon_optimizer.py
+++ /dev/null
@@ -1,670 +0,0 @@
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-import os
-
-import pytest
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from packaging.version import Version
-
-from megatron.core import parallel_state
-from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
-from megatron.core.optimizer import OptimizerConfig
-from megatron.core.optimizer.muon import TensorParallelMuon, get_megatron_muon_optimizer
-from megatron.core.process_groups_config import ProcessGroupCollection
-from megatron.core.transformer import TransformerConfig
-from tests.unit_tests.test_utilities import Utils
-
-# Skip all tests in this file for LTS versions
-pytestmark = pytest.mark.skipif(
-    Version(os.getenv('NVIDIA_PYTORCH_VERSION', "24.01")) <= Version("25.05"),
-    reason="Skip muon optimizer for LTS test",
-)
-
-
-class Net(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.fc1 = nn.Linear(80, 48)
-        self.fc2 = nn.Linear(48, 32)
-        self.fc3 = nn.Linear(32, 24)
-        self.fc4 = nn.Linear(24, 16)
-        self.fc5 = nn.Linear(16, 10)
-
-    def forward(self, x):
-        x = F.relu(self.fc1(x))
-        x = F.relu(self.fc2(x))
-        x = F.relu(self.fc3(x))
-        x = F.relu(self.fc4(x))
-        x = self.fc5(x)
-        return x
-
-
-def test_muon_optimizer_smoke():
-    """Smoke test for TensorParallelMuon optimizer."""
-    # Create a simple linear model for testing
-    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
-    model.requires_grad_(True)
-    model.weight.data.fill_(1.0)
-
-    # Create TensorParallelMuon optimizer
-    optimizer = TensorParallelMuon(
-        params=[model.weight],
-        lr=0.01,
-        momentum_beta=0.95,
-        use_nesterov=True,
-        weight_decay=0.01,
-        use_decoupled_weight_decay=True,
-        split_qkv=False,
-        fp32_matmul_prec="medium",
-        num_ns_steps=5,
-        scale_mode="spectral",
-        extra_scale_factor=1.0,
-        pg_collection=None,
-        mode="duplicated",
-    )
-
-    # Test basic properties
-    assert optimizer is not None, "Optimizer should not be None"
-    assert hasattr(optimizer, 'param_groups'), "Optimizer should have param_groups"
-    assert len(optimizer.param_groups) > 0, "Optimizer should have at least one parameter group"
-
-    # Test forward and backward pass
-    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
-    output = model(input_tensor)
-    loss = output.sum()
-    loss.backward()
-
-    # Store original weight
-    original_weight = model.weight.data.clone()
-
-    # Test optimizer step
-    optimizer.step()
-
-    # Verify weight was updated
-    assert not torch.equal(
-        model.weight.data, original_weight
-    ), "Weight should be updated after optimizer step"
-
-    # Test zero_grad
-    optimizer.zero_grad()
-    assert model.weight.grad is None or torch.all(
-        model.weight.grad == 0
-    ), "Gradients should be zeroed"
-
-    # Test state_dict and load_state_dict
-    state_dict = optimizer.state_dict()
-    assert 'state' in state_dict, "State dict should contain state"
-    assert 'param_groups' in state_dict, "State dict should contain param_groups"
-
-    # Load state dict should not raise error
-    optimizer.load_state_dict(state_dict)
-
-
-@pytest.mark.skipif(
-    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
-)
-class TestMuonOptimizerMultiRank:
-    """Test class for Muon optimizer with multi-rank setup."""
-
-    @pytest.fixture(autouse=True)
-    def setup_and_teardown(self):
-        """Setup and teardown for each test."""
-        Utils.initialize_model_parallel()
-        yield
-        Utils.destroy_model_parallel()
-
-    def create_ddp_model(self, model):
-        """Wrap model in DDP.
-
-        Args:
-            model: Model to wrap
-
-        Returns:
-            DDP-wrapped model
-        """
-        ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=False)
-        return DistributedDataParallel(
-            TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
-        )
-
-    def test_get_megatron_muon_optimizer_smoke(self):
-        """Smoke test for get_megatron_muon_optimizer function."""
-        model = Net().bfloat16().cuda()
-        model.requires_grad_(True)
-        model = self.create_ddp_model(model)
-
-        # Ensure all parameters require gradients
-        for param in model.parameters():
-            assert param.requires_grad, "All parameters should require gradients"
-
-        # Create optimizer config for Muon
-        optimizer_config = OptimizerConfig(
-            optimizer='muon',  # This will be changed internally to 'adam' for non-linear params
-            lr=0.01,
-            weight_decay=0.01,
-            bf16=True,
-            use_distributed_optimizer=False,  # Muon doesn't support distributed optimizer
-            muon_momentum=0.95,
-            muon_use_nesterov=True,
-            muon_fp32_matmul_prec="medium",
-            muon_num_ns_steps=5,
-            muon_scale_mode="spectral",
-            muon_tp_mode="duplicated",
-        )
-
-        # Test creating the optimizer
-        optimizer = get_megatron_muon_optimizer(
-            config=optimizer_config,
-            model_chunks=[model],
-            use_gloo_process_groups=True,
-            layer_wise_distributed_optimizer=False,
-        )
-
-        # Test basic properties
-        assert optimizer is not None, "Optimizer should not be None"
-        assert hasattr(optimizer, 'param_groups'), "Optimizer should have param_groups"
-        assert hasattr(optimizer, 'chained_optimizers'), "Should be a ChainedOptimizer"
-        assert len(optimizer.chained_optimizers) >= 1, "Should have at least one chained optimizer"
-
-        # Test forward and backward pass
-        input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
-        output = model(input_tensor)
-        loss = output.sum()
-        loss.backward()
-
-        # Store original parameters
-        original_params = {}
-        for name, param in model.named_parameters():
-            original_params[name] = param.data.clone()
-
-        # Test optimizer step
-        optimizer.step()
-
-        # Verify at least some parameters were updated
-        params_updated = 0
-        for name, param in model.named_parameters():
-            if not torch.equal(param.data, original_params[name]):
-                params_updated += 1
-
-        assert params_updated > 0, "At least some parameters should be updated after optimizer step"
-
-        # Test zero_grad
-        optimizer.zero_grad()
-        for param in model.parameters():
-            assert param.grad is None or torch.all(
-                param.grad == 0
-            ), f"Gradients should be zeroed for all parameters"
-
-        # Test state_dict and load_state_dict
-        state_dict = optimizer.state_dict()
-        assert isinstance(state_dict, list), "State dict should be a list"
-
-        # Load state dict should not raise error
-        optimizer.load_state_dict(state_dict)
-
-    def test_get_megatron_muon_optimizer_validation(self):
-        """Test validation logic for get_megatron_muon_optimizer."""
-        model = torch.nn.Linear(100, 50, bias=False, dtype=torch.bfloat16, device='cuda')
-        model.requires_grad_(True)
-        model = self.create_ddp_model(model)
-
-        # Test 1: Distributed optimizer should raise exception
-        optimizer_config_dist = OptimizerConfig(
-            optimizer='muon',
-            lr=0.01,
-            bf16=True,
-            use_distributed_optimizer=True,  # This should cause an exception
-        )
-
-        with pytest.raises(Exception, match='muon with dist optimizer is not supported'):
-            get_megatron_muon_optimizer(config=optimizer_config_dist, model_chunks=[model])
-
-        # Test 2: FP16 should raise exception
-        optimizer_config_fp16 = OptimizerConfig(
-            optimizer='muon',
-            lr=0.01,
-            fp16=True,  # This should cause an exception
-            use_distributed_optimizer=False,
-        )
-
-        with pytest.raises(Exception, match='muon with fp16 is not supported'):
-            get_megatron_muon_optimizer(config=optimizer_config_fp16, model_chunks=[model])
-
-        # Test 3: Invalid num_ns_steps should raise exception
-        optimizer_config_invalid_ns = OptimizerConfig(
-            optimizer='muon',
-            lr=0.01,
-            bf16=True,
-            use_distributed_optimizer=False,
-            muon_num_ns_steps=0,  # This should cause an exception
-        )
-
-        with pytest.raises(ValueError, match='num_ns_steps must be at least 1'):
-            get_megatron_muon_optimizer(config=optimizer_config_invalid_ns, model_chunks=[model])
-
-    def test_get_megatron_muon_optimizer_layer_wise(self):
-        """Test get_megatron_muon_optimizer with layer-wise distributed optimizer."""
-        model = Net().bfloat16().cuda()
-        model.requires_grad_(True)
-        model = self.create_ddp_model(model)
-
-        optimizer_config = OptimizerConfig(
-            optimizer='muon',
-            lr=0.01,
-            weight_decay=0.01,
-            bf16=True,
-            use_distributed_optimizer=False,
-            muon_momentum=0.95,
-            muon_use_nesterov=True,
-            muon_fp32_matmul_prec="medium",
-            muon_num_ns_steps=5,
-            muon_scale_mode="spectral",
-            muon_tp_mode="duplicated",
-        )
-
-        # Test with layer_wise_distributed_optimizer=True
-        optimizer = get_megatron_muon_optimizer(
-            config=optimizer_config,
-            model_chunks=[model],
-            use_gloo_process_groups=True,
-            layer_wise_distributed_optimizer=True,
-        )
-
-        # Verify it's a LayerWiseDistributedOptimizer
-        from megatron.core.optimizer.layer_wise_optimizer import LayerWiseDistributedOptimizer
-
-        assert isinstance(
-            optimizer, LayerWiseDistributedOptimizer
-        ), "Should return LayerWiseDistributedOptimizer"
-
-        # Test forward and backward pass
-        input_tensor = torch.randn(16, 80, dtype=torch.bfloat16, device='cuda')
-        output = model(input_tensor)
-        loss = output.sum()
-        loss.backward()
-
-        # Test optimizer step
-        update_successful, grad_norm, num_zeros = optimizer.step()
-
-        assert update_successful, "Optimizer step should be successful"
-        assert grad_norm is not None or grad_norm is None, "Grad norm should be returned"
-
-
-@pytest.mark.parametrize("mode", ["duplicated", "blockwise", "distributed"])
-def test_muon_optimizer_different_modes_single_rank(mode):
-    """Test TensorParallelMuon optimizer with different modes on single rank.
-
-    When TP size is 1, all modes should produce the same result.
-    """
-    # Set random seed for reproducibility
-    torch.manual_seed(42)
-    torch.cuda.manual_seed(42)
-
-    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
-    model.requires_grad_(True)
-    model.weight.data.normal_(0, 0.02)
-
-    optimizer = TensorParallelMuon(
-        params=[model.weight],
-        lr=0.01,
-        momentum_beta=0.95,
-        weight_decay=0.0,  # Disable weight decay for deterministic comparison
-        num_ns_steps=5,
-        pg_collection=None,
-        mode=mode,
-    )
-
-    # Use fixed input for deterministic results
-    torch.manual_seed(42)
-    input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
-
-    output = model(input_tensor)
-    loss = output.sum()
-    loss.backward()
-
-    original_weight = model.weight.data.clone()
-    optimizer.step()
-
-    # Verify weight was updated
-    assert not torch.equal(
-        model.weight.data, original_weight
-    ), f"Weight should be updated with mode={mode}"
-
-
-@pytest.mark.skipif(
-    int(os.getenv('WORLD_SIZE', '1')) == 1, reason="Multi-rank test requires WORLD_SIZE > 1"
-)
-class TestMuonOptimizerMultiRankTP:
-    """Test class for Muon optimizer with multi-rank and tensor parallel setup."""
-
-    @pytest.fixture(autouse=True)
-    def setup_and_teardown(self):
-        """Setup and teardown for each test with tensor parallel."""
-        world = int(os.getenv('WORLD_SIZE', '1'))
-        Utils.initialize_model_parallel(tensor_model_parallel_size=min(world, 2))
-        yield
-        Utils.destroy_model_parallel()
-
-    def create_tp_model_and_optimizer(self, mode):
-        """Create model with TP and optimizer.
-
-        Args:
-            mode: Muon optimizer mode
-
-        Returns:
-            tuple: (model, optimizer, pg_collection)
-        """
-        rank = int(os.getenv('RANK', '0'))
-        pg_collection = ProcessGroupCollection.use_mpu_process_groups()
-
-        # Create model with partition_dim for TP
-        torch.manual_seed(42 + rank)
-        model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
-        model.requires_grad_(True)
-        model.weight.data.normal_(0, 0.02)
-        model.weight.partition_dim = 0  # Set partition dimension for TP
-
-        optimizer = TensorParallelMuon(
-            params=[model.weight],
-            lr=0.01,
-            momentum_beta=0.95,
-            weight_decay=0.0,
-            num_ns_steps=5,
-            pg_collection=pg_collection,
-            mode=mode,
-        )
-
-        return model, optimizer
-
-    @pytest.mark.parametrize("mode", ["duplicated", "distributed"])
-    def test_muon_optimizer_modes_multirank_same_result(self, mode):
-        """Test that duplicated and distributed modes produce same results with TP > 1."""
-        model, optimizer = self.create_tp_model_and_optimizer(mode)
-
-        # Use fixed input for deterministic results
-        torch.manual_seed(42)
-        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
-
-        output = model(input_tensor)
-        loss = output.sum()
-        loss.backward()
-
-        original_weight = model.weight.data.clone()
-        optimizer.step()
-
-        # Verify weight was updated
-        assert not torch.equal(
-            model.weight.data, original_weight
-        ), f"Weight should be updated with mode={mode}"
-
-    def test_muon_optimizer_blockwise_mode_different_result(self):
-        """Test that blockwise mode produces different results than duplicated/distributed with TP > 1."""
-        model, optimizer = self.create_tp_model_and_optimizer("blockwise")
-
-        # Use fixed input for deterministic results
-        torch.manual_seed(42)
-        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
-
-        output = model(input_tensor)
-        loss = output.sum()
-        loss.backward()
-
-        original_weight = model.weight.data.clone()
-        optimizer.step()
-
-        # Verify weight was updated
-        assert not torch.equal(
-            model.weight.data, original_weight
-        ), "Weight should be updated with mode=blockwise"
-
-
-@pytest.mark.parametrize(
-    "coefficient_type_and_steps", [("simple", 3), ("quintic", 5), ("polar_express", 8)]
-)
-def test_muon_optimizer_coefficient_types(coefficient_type_and_steps):
-    """Test TensorParallelMuon optimizer with different coefficient types."""
-    model = torch.nn.Linear(80, 40, bias=False, dtype=torch.float32, device='cuda')
-    model.requires_grad_(True)
-    model.weight.data.fill_(1.0)
-
-    optimizer = TensorParallelMuon(
-        params=[model.weight],
-        lr=0.01,
-        coefficient_type=coefficient_type_and_steps[0],
-        num_ns_steps=coefficient_type_and_steps[1],
-        pg_collection=None,
-        mode="duplicated",
-    )
-
-    input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
-    output = model(input_tensor)
-    loss = output.sum()
-    loss.backward()
-
-    original_weight = model.weight.data.clone()
-    optimizer.step()
-
-    assert not torch.equal(
-        model.weight.data, original_weight
-    ), f"Weight should be updated with coefficient_type={coefficient_type_and_steps[0]} and num_ns_steps={coefficient_type_and_steps[1]}"
-
-
-@pytest.mark.parametrize("scale_mode", ["spectral", "unit_rms_norm", "shape_scaling"])
-def test_muon_optimizer_scale_modes(scale_mode):
-    """Test TensorParallelMuon optimizer with different scale modes."""
-    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
-    model.requires_grad_(True)
-    model.weight.data.fill_(1.0)
-
-    optimizer = TensorParallelMuon(
-        params=[model.weight],
-        lr=0.01,
-        scale_mode=scale_mode,
-        num_ns_steps=5,
-        pg_collection=None,
-        mode="duplicated",
-    )
-
-    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
-    output = model(input_tensor)
-    loss = output.sum()
-    loss.backward()
-
-    original_weight = model.weight.data.clone()
-    optimizer.step()
-
-    assert not torch.equal(
-        model.weight.data, original_weight
-    ), f"Weight should be updated with scale_mode={scale_mode}"
-
-
-@pytest.mark.parametrize("use_nesterov", [True, False])
-def test_muon_optimizer_nesterov(use_nesterov):
-    """Test TensorParallelMuon optimizer with and without Nesterov momentum."""
-    model = torch.nn.Linear(50, 25, bias=False, dtype=torch.float32, device='cuda')
-    model.requires_grad_(True)
-    model.weight.data.fill_(1.0)
-
-    optimizer = TensorParallelMuon(
-        params=[model.weight],
-        lr=0.01,
-        momentum_beta=0.9,
-        use_nesterov=use_nesterov,
-        num_ns_steps=5,
-        pg_collection=None,
-        mode="duplicated",
-    )
-
-    input_tensor = torch.randn(16, 50, dtype=torch.float32, device='cuda')
-    output = model(input_tensor)
-    loss = output.sum()
-    loss.backward()
-
-    original_weight = model.weight.data.clone()
-    optimizer.step()
-
-    assert not torch.equal(
-        model.weight.data, original_weight
-    ), f"Weight should be updated with use_nesterov={use_nesterov}"
-
-
-def test_muon_optimizer_multiple_steps():
-    """Test TensorParallelMuon optimizer across multiple optimization steps."""
-    model = torch.nn.Linear(100, 50, bias=False, dtype=torch.float32, device='cuda')
-    model.requires_grad_(True)
-    model.weight.data.fill_(1.0)
-
-    optimizer = TensorParallelMuon(
-        params=[model.weight],
-        lr=0.01,
-        momentum_beta=0.95,
-        weight_decay=0.01,
-        num_ns_steps=5,
-        pg_collection=None,
-        mode="duplicated",
-    )
-
-    weights_history = [model.weight.data.clone()]
-
-    for i in range(3):
-        input_tensor = torch.randn(32, 100, dtype=torch.float32, device='cuda')
-        output = model(input_tensor)
-        loss = output.sum()
-        loss.backward()
-
-        optimizer.step()
-        optimizer.zero_grad()
-        weights_history.append(model.weight.data.clone())
-
-    # Verify weights changed at each step
-    for i in range(len(weights_history) - 1):
-        assert not torch.equal(
-            weights_history[i], weights_history[i + 1]
-        ), f"Weight should change at step {i}"
-
-
-def test_muon_optimizer_qkv_split():
-    """Test TensorParallelMuon optimizer with QKV splitting."""
-    # Create a model with QKV-like parameter
-    qkv_size = 3 * 64 * 16  # Combined Q, K, V dimensions, 16 heads x 64 per head
-    hidden_size = 1024
-    model = torch.nn.Linear(hidden_size, qkv_size, bias=False, dtype=torch.float32, device='cuda')
-    model.requires_grad_(True)
-    model.weight.data.fill_(1.0)
-
-    # Mark parameter as QKV
-    model.weight.is_qkv = True
-
-    # QKV split shapes: [Q_size, K_size, V_size]
-    qkv_split_shapes = (64, 64, 64)
-
-    # Test with split_qkv=True
-    optimizer_split = TensorParallelMuon(
-        params=[model.weight],
-        lr=0.01,
-        split_qkv=True,
-        is_qkv_fn=lambda p: getattr(p, 'is_qkv', False),
-        qkv_split_shapes=qkv_split_shapes,
-        num_ns_steps=5,
-        pg_collection=None,
-        mode="duplicated",
-    )
-
-    input_tensor = torch.randn(16, hidden_size, dtype=torch.float32, device='cuda')
-    output = model(input_tensor)
-    loss = output.sum()
-    loss.backward()
-
-    original_weight = model.weight.data.clone()
-    optimizer_split.step()
-    weight_with_split = model.weight.data.clone()
-
-    assert not torch.equal(
-        weight_with_split, original_weight
-    ), "QKV weight should be updated with split_qkv=True"
-
-    # Reset model and test with split_qkv=False
-    model.weight.data.fill_(1.0)
-    optimizer_no_split = TensorParallelMuon(
-        params=[model.weight],
-        lr=0.01,
-        split_qkv=False,
-        num_ns_steps=5,
-        pg_collection=None,
-        mode="duplicated",
-    )
-
-    output = model(input_tensor)
-    loss = output.sum()
-    loss.backward()
-
-    optimizer_no_split.step()
-    weight_without_split = model.weight.data.clone()
-
-    assert not torch.equal(
-        weight_without_split, original_weight
-    ), "QKV weight should be updated with split_qkv=False"
-
-    # Ensure the two results are different
-    assert not torch.equal(
-        weight_with_split, weight_without_split
-    ), "Weights should be different between split_qkv=True and split_qkv=False"
-
-
-def test_muon_optimizer_extra_scale_factor():
-    """Test TensorParallelMuon optimizer with different extra_scale_factor values."""
-    model = torch.nn.Linear(80, 40, bias=False, dtype=torch.float32, device='cuda')
-    model.requires_grad_(True)
-    model.weight.data.fill_(1.0)
-
-    optimizer = TensorParallelMuon(
-        params=[model.weight],
-        lr=0.01,
-        extra_scale_factor=2.0,
-        num_ns_steps=5,
-        pg_collection=None,
-        mode="duplicated",
-    )
-
-    input_tensor = torch.randn(16, 80, dtype=torch.float32, device='cuda')
-    output = model(input_tensor)
-    loss = output.sum()
-    loss.backward()
-
-    original_weight = model.weight.data.clone()
-    optimizer.step()
-
-    assert not torch.equal(
-        model.weight.data, original_weight
-    ), "Weight should be updated with extra_scale_factor"
-
-
-@pytest.mark.parametrize("num_ns_steps", [5, 15, 25])
-def test_muon_optimizer_num_ns_steps(num_ns_steps):
-    """Test TensorParallelMuon optimizer with different numbers of Newton-Schulz steps."""
-    model = torch.nn.Linear(60, 30, bias=False, dtype=torch.float32, device='cuda')
-    model.requires_grad_(True)
-    model.weight.data.fill_(1.0)
-
-    optimizer = TensorParallelMuon(
-        params=[model.weight],
-        lr=0.01,
-        coefficient_type="quintic",
-        num_ns_steps=num_ns_steps,
-        pg_collection=None,
-        mode="duplicated",
-    )
-
-    input_tensor = torch.randn(16, 60, dtype=torch.float32, device='cuda')
-    output = model(input_tensor)
-    loss = output.sum()
-    loss.backward()
-
-    original_weight = model.weight.data.clone()
-    optimizer.step()
-
-    assert not torch.equal(
-        model.weight.data, original_weight
-    ), f"Weight should be updated with num_ns_steps={num_ns_steps}"
diff --git a/tests/unit_tests/test_optimizer.py b/tests/unit_tests/test_optimizer.py
index 2488900ba72..56af8545042 100644
--- a/tests/unit_tests/test_optimizer.py
+++ b/tests/unit_tests/test_optimizer.py
@@ -106,10 +106,10 @@ def test_get_param_groups_no_overrides(mock_get_world_size):
 def test_get_param_groups_default_overrides(mock_get_world_size):
     """Test that the default overrides are applied to the parameter groups."""
     net = Net()
-    # NOTE: to get legacy default overrides, supply None.
     opt_config = OptimizerConfig(optimizer='adam', lr=0.01)
-    check_config_overrides_consistency(opt_config, None)
-    param_groups = _get_param_groups([net], opt_config, None)
+    config_overrides = get_standard_config_overrides(opt_config)
+    check_config_overrides_consistency(opt_config, config_overrides)
+    param_groups = _get_param_groups([net], opt_config, config_overrides)
     assert len(param_groups) == 2
     pg0, pg1 = param_groups
     wd_mults = {pg0['wd_mult'], pg1['wd_mult']}
diff --git a/tests/unit_tests/test_optimizer_state_offloading.py b/tests/unit_tests/test_optimizer_state_offloading.py
new file mode 100644
index 00000000000..baaab355182
--- /dev/null
+++ b/tests/unit_tests/test_optimizer_state_offloading.py
@@ -0,0 +1,337 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+
+"""Unit tests for OptimizerStateOffloader."""
+
+import pytest
+import torch
+import torch.nn as nn
+
+from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig
+from megatron.core.optimizer import OptimizerConfig, get_megatron_optimizer
+from megatron.core.transformer import TransformerConfig
+from tests.unit_tests.test_utilities import Utils
+
+try:
+    from transformer_engine.pytorch.optimizers import FusedAdam  # noqa: F401
+
+    TE_FUSED_ADAM_AVAILABLE = True
+except ImportError:
+    TE_FUSED_ADAM_AVAILABLE = False
+
+
+class SimpleModel(nn.Module):
+    """Simple model for testing."""
+
+    def __init__(self, hidden_size=256):
+        super().__init__()
+        self.fc1 = nn.Linear(hidden_size, hidden_size)
+        self.fc2 = nn.Linear(hidden_size, hidden_size)
+
+    def forward(self, x):
+        return self.fc2(torch.relu(self.fc1(x)))
+
+
+def create_model_and_optimizer(hidden_size=256, offload_optimizer_states=True, **optimizer_kwargs):
+    """Helper to create model and optimizer for tests."""
+    model = SimpleModel(hidden_size=hidden_size).bfloat16().cuda()
+    ddp_config = DistributedDataParallelConfig(use_distributed_optimizer=True)
+    model = DistributedDataParallel(
+        TransformerConfig(num_attention_heads=1, num_layers=1), ddp_config, model
+    )
+
+    default_config = dict(
+        optimizer='adam',
+        bf16=True,
+        lr=0.001,
+        use_distributed_optimizer=True,
+        offload_optimizer_states=offload_optimizer_states,
+    )
+    default_config.update(optimizer_kwargs)
+
+    optimizer_config = OptimizerConfig(**default_config)
+    optim = get_megatron_optimizer(optimizer_config, [model])
+    return model, optim
+
+
+def run_forward_backward_step(model, optim, hidden_size=256):
+    """Run a single forward-backward-step cycle."""
+    input_tensor = torch.randn(8, hidden_size, dtype=torch.bfloat16, device='cuda')
+    output = model(input_tensor)
+    output.sum().backward()
+    optim.step()
+    optim.zero_grad()
+
+
+# =============================================================================
+# Test 1: Basic OptimizerStateOffloader Initialization
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_offloader_initialization():
+    """Test that OptimizerStateOffloader initializes correctly."""
+    Utils.initialize_model_parallel()
+    model, optim = create_model_and_optimizer()
+    dist_optim = optim.chained_optimizers[0]
+
+    # Offloader is created in __init__ when offload_optimizer_states=True
+    assert dist_optim._state_offloader is not None
+    offloader = dist_optim._state_offloader
+
+    # Verify offloader properties
+    assert offloader.adam_optimizer is not None
+    assert offloader._d2h_stream is not None
+    assert offloader._h2d_stream is not None
+    assert offloader._offloaded is False
+
+    # Before first step, optimizer states are not initialized yet
+    assert offloader._optimizer_states_initialized is False
+
+    # Run one step to initialize optimizer states
+    run_forward_backward_step(model, optim)
+
+    # After first step, optimizer states should be marked as initialized
+    assert offloader._optimizer_states_initialized is True
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 2: Early Master Weight Offloading Before First Step
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_early_master_weight_offloading():
+    """Test that master weights can be offloaded before the first optimizer step."""
+    Utils.initialize_model_parallel()
+    model, optim = create_model_and_optimizer()
+    dist_optim = optim.chained_optimizers[0]
+
+    # Offloader is created in __init__
+    assert dist_optim._state_offloader is not None
+    offloader = dist_optim._state_offloader
+
+    # Before first step, optimizer states are not initialized
+    assert offloader._optimizer_states_initialized is False
+
+    # Capture original master weights before offload
+    original_master_weights = []
+    for group in dist_optim.shard_fp32_from_float16_groups:
+        group_weights = [tensor.clone() for tensor in group]
+        original_master_weights.append(group_weights)
+
+    # Offload before first step - should only offload master weights
+    offloader.offload()
+    offloader.release_gpu_memory()
+    torch.cuda.synchronize()
+
+    # Verify master weights were offloaded (storage resized to 0)
+    for group in dist_optim.shard_fp32_from_float16_groups:
+        for tensor in group:
+            assert tensor.untyped_storage().size() == 0, "Master weight should be offloaded"
+
+    # Reload master weights
+    offloader.reload()
+    offloader.sync_before_step()
+
+    # Verify master weights match after reload
+    for group_idx, group in enumerate(dist_optim.shard_fp32_from_float16_groups):
+        for param_idx, tensor in enumerate(group):
+            original = original_master_weights[group_idx][param_idx]
+            torch.testing.assert_close(
+                tensor,
+                original,
+                msg=f"Master weight [{group_idx}][{param_idx}] mismatch after offload/reload",
+            )
+
+    # Now run a step and verify optimizer states can be offloaded after
+    run_forward_backward_step(model, optim)
+    assert offloader._optimizer_states_initialized is True
+
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 3: Offload and Reload Correctness
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+@pytest.mark.parametrize("offload_optimizer_states", [True, False])
+@pytest.mark.parametrize("offload_master_weights", [True, False])
+def test_offload_reload_correctness(offload_optimizer_states, offload_master_weights):
+    """Test that offload/reload preserves optimizer state values."""
+    if not offload_optimizer_states and not offload_master_weights:
+        pytest.skip("At least one offload type required")
+
+    Utils.initialize_model_parallel()
+    model, optim = create_model_and_optimizer()
+    dist_optim = optim.chained_optimizers[0]
+
+    # Run steps to build up optimizer state
+    for _ in range(3):
+        run_forward_backward_step(model, optim)
+
+    offloader = dist_optim._state_offloader
+
+    # Capture original states before offload
+    original_states = {}
+    for param, state in offloader.adam_optimizer.state.items():
+        original_states[param] = {
+            k: v.clone() for k, v in state.items() if isinstance(v, torch.Tensor)
+        }
+
+    # Offload
+    offloader.offload(
+        offload_optimizer_states=offload_optimizer_states,
+        offload_master_weights=offload_master_weights,
+    )
+
+    # Release GPU memory
+    offloader.release_gpu_memory()
+    torch.cuda.synchronize()
+
+    # Reload
+    offloader.reload()
+    offloader.sync_before_step()
+
+    # Verify states match after reload
+    for param, state in offloader.adam_optimizer.state.items():
+        if param in original_states:
+            for key, original_tensor in original_states[param].items():
+                if key in state and isinstance(state[key], torch.Tensor):
+                    reloaded_tensor = state[key]
+                    assert reloaded_tensor.device.type == 'cuda', f"State {key} should be on GPU"
+                    torch.testing.assert_close(
+                        reloaded_tensor,
+                        original_tensor,
+                        msg=f"State {key} mismatch after offload/reload",
+                    )
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 4: GPU Memory Release Verification
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_gpu_memory_release():
+    """Test that GPU memory is actually freed after release_gpu_memory()."""
+    Utils.initialize_model_parallel()
+    # Use larger model for measurable memory impact
+    model, optim = create_model_and_optimizer(hidden_size=1024)
+    dist_optim = optim.chained_optimizers[0]
+
+    # Initialize optimizer states
+    run_forward_backward_step(model, optim, hidden_size=1024)
+
+    offloader = dist_optim._state_offloader
+
+    # Measure memory before offload
+    torch.cuda.synchronize()
+    torch.cuda.empty_cache()
+    memory_before = torch.cuda.memory_allocated()
+
+    # Offload and release
+    offloader.offload()
+    offloader.release_gpu_memory()
+
+    # Wait for async operations
+    torch.cuda.synchronize()
+    torch.cuda.empty_cache()
+    memory_after = torch.cuda.memory_allocated()
+
+    # Memory should decrease
+    memory_freed = memory_before - memory_after
+    assert memory_freed > 0, f"Expected memory to be freed, but got {memory_freed} bytes difference"
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 5: Multiple Offload/Reload Cycles
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_multiple_offload_reload_cycles():
+    """Test that multiple offload/reload cycles work correctly."""
+    Utils.initialize_model_parallel()
+    model, optim = create_model_and_optimizer()
+    dist_optim = optim.chained_optimizers[0]
+
+    # Initialize
+    run_forward_backward_step(model, optim)
+
+    offloader = dist_optim._state_offloader
+
+    # Run multiple cycles
+    for cycle in range(5):
+        # Offload
+        offloader.offload()
+        offloader.release_gpu_memory()
+
+        # Reload
+        offloader.reload()
+        offloader.sync_before_step()
+
+        # Run optimizer step
+        run_forward_backward_step(model, optim)
+
+    # Verify model can still produce valid outputs
+    input_tensor = torch.randn(8, 256, dtype=torch.bfloat16, device='cuda')
+    output = model(input_tensor)
+    assert not output.isnan().any(), "Model output contains NaN after multiple cycles"
+    Utils.destroy_model_parallel()
+
+
+# =============================================================================
+# Test 6: Training Correctness with Offloading
+# =============================================================================
+@pytest.mark.skipif(not TE_FUSED_ADAM_AVAILABLE, reason="Requires TE FusedAdam")
+def test_training_correctness_with_offloading():
+    """Test that training with offloading produces same results as without."""
+    Utils.initialize_model_parallel()
+    torch.manual_seed(42)
+
+    # Model 1: with offloading
+    model1, optim1 = create_model_and_optimizer(offload_optimizer_states=True, lr=0.01)
+
+    # Model 2: without offloading (reference)
+    torch.manual_seed(42)
+    model2, optim2 = create_model_and_optimizer(offload_optimizer_states=False, lr=0.01)
+
+    # Train both models
+    n_steps = 10
+    torch.manual_seed(123)
+    dist_optim1 = optim1.chained_optimizers[0]
+
+    # Offloader is created in __init__ when offload_optimizer_states=True
+    assert dist_optim1._state_offloader is not None
+    offloader = dist_optim1._state_offloader
+
+    for step in range(n_steps):
+        input_tensor = torch.randn(8, 256, dtype=torch.bfloat16, device='cuda')
+
+        # Model 1 with offloading
+        # Offload states (master weights can be offloaded from the start,
+        # optimizer states will be skipped until after first step)
+        offloader.offload()
+        offloader.release_gpu_memory()
+
+        output1 = model1(input_tensor)
+        loss1 = output1.sum()
+        loss1.backward()
+
+        offloader.reload()
+        offloader.sync_before_step()
+        optim1.step()
+        optim1.zero_grad()
+
+        # Model 2 without offloading
+        output2 = model2(input_tensor)
+        loss2 = output2.sum()
+        loss2.backward()
+        optim2.step()
+        optim2.zero_grad()
+
+    # Compare final model weights
+    for (n1, p1), (n2, p2) in zip(model1.named_parameters(), model2.named_parameters()):
+        torch.testing.assert_close(
+            p1.data,
+            p2.data,
+            atol=1e-5,
+            rtol=1e-4,
+            msg=f"Parameter {n1} mismatch between offloaded and non-offloaded training",
+        )
+    Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/test_parallel_state.py b/tests/unit_tests/test_parallel_state.py
index 21dc740cdf4..e7aa2fe4927 100644
--- a/tests/unit_tests/test_parallel_state.py
+++ b/tests/unit_tests/test_parallel_state.py
@@ -507,9 +507,9 @@ def golden_rank_result_from_past_code(
     "world_size, tp_size, cp_size, dp_size",
     [(8, 1, 2, 4), (8, 1, 1, 8)],  # 8 GPUs, 1 TP, 2 CP, 4 DP  # 8 GPUs, 1 TP, 1 CP, 8 DP
 )
-def test_hybrid_dp_cp_groups(world_size, tp_size, cp_size, dp_size):
+def test_dynamic_dp_cp_groups(world_size, tp_size, cp_size, dp_size):
     """
-    Test that hybrid DPxCP groups are created correctly.
+    Test that dynamic DPxCP groups are created correctly.
     """
     Utils.destroy_model_parallel()
 
@@ -520,13 +520,13 @@ def test_hybrid_dp_cp_groups(world_size, tp_size, cp_size, dp_size):
     Utils.initialize_model_parallel(
         tensor_model_parallel_size=tp_size,
         context_parallel_size=cp_size,
-        hybrid_context_parallel=True,
+        dynamic_context_parallel=True,
     )
 
     dp_cp_size = ps.get_data_parallel_world_size(with_context_parallel=True)
-    group_sizes = [2**i for i in range(int(log2(dp_cp_size)))][1:]
+    group_sizes = [2**i for i in range(int(log2(dp_cp_size)))]
     for group_size in group_sizes:
-        group = ps.get_hybrid_data_context_parallel_groups(group_size=group_size)
+        group = ps.get_dynamic_data_context_parallel_groups(group_size=group_size)
         assert group.size() == group_size
 
     Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/test_sequence_packing.py b/tests/unit_tests/test_sequence_packing.py
new file mode 100644
index 00000000000..60316b0236e
--- /dev/null
+++ b/tests/unit_tests/test_sequence_packing.py
@@ -0,0 +1,479 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import random
+from types import SimpleNamespace
+
+import numpy as np
+import pytest
+import torch
+
+from megatron.core import parallel_state
+from megatron.core.datasets.data_schedule import (
+    get_batch_on_this_rank_for_sequence_packing,
+    wrap_data_iterator,
+)
+from megatron.core.rerun_state_machine import RerunDataIterator
+from megatron.training.global_vars import unset_global_variables
+from tests.unit_tests.test_utilities import Utils
+
+
+class MockVariableLengthSequencePackingDataIterator:
+    """
+    Mock data iterator for testing get_batch_on_this_rank_for_sequence_packing.
+
+    Generates variable-length (THD format) packed sequences with deterministic
+    data for verification across parallel ranks.
+    """
+
+    def __init__(
+        self,
+        total_seq_length: int,
+        sequence_lengths: list,
+        local_cp_size: int = None,
+        device: str = "cuda",
+        seed: int = 42,
+    ):
+        """
+        Args:
+            total_seq_length: Total length of packed sequences
+            sequence_lengths: List of individual sequence lengths (variable-length).
+                              If None, generates random variable lengths.
+            device: Device to create tensors on
+            seed: Random seed for reproducibility
+        """
+        self.total_seq_length = total_seq_length
+        self.sequence_lengths = sequence_lengths
+        self.local_cp_size = local_cp_size
+        self.device = device
+        self.seed = seed
+        assert (
+            sum(self.sequence_lengths) == total_seq_length
+        ), f"Sequence lengths sum {sum(self.sequence_lengths)} != total {total_seq_length}"
+
+    def __iter__(self):
+        """Interface for the data iterator."""
+        return self
+
+    def __next__(self):
+        """Generate a mock batch with variable-length THD format."""
+        dev = self.device
+        torch.manual_seed(self.seed)
+        torch.cuda.manual_seed(self.seed)
+
+        tokens = torch.randint(0, 16384, (self.total_seq_length,), dtype=torch.int64, device=dev)
+
+        # Create position_ids that reset for each sequence (THD format)
+        position_ids = []
+        for seq_len in self.sequence_lengths:
+            position_ids.extend(range(seq_len))
+        position_ids = torch.tensor(position_ids, dtype=torch.int64, device=dev)
+
+        # Labels are tokens shifted by 1 for easy verification
+        labels = tokens + 1
+
+        # Loss mask: 1.0 for all positions except padding (none here)
+        loss_mask = torch.ones(self.total_seq_length, dtype=torch.float32, device=dev)
+
+        # Create cu_seqlens for variable-length packed sequences
+        cu_seqlens = [0]
+        for seq_len in self.sequence_lengths:
+            cu_seqlens.append(cu_seqlens[-1] + seq_len)
+        cu_seqlens = torch.tensor(cu_seqlens, dtype=torch.int32, device=dev)
+        cu_seqlens_padded = cu_seqlens.clone()
+
+        max_seqlen = torch.tensor([max(self.sequence_lengths)], dtype=torch.int32, device=dev)
+
+        batch = {
+            "tokens": tokens,
+            "position_ids": position_ids,
+            "labels": labels,
+            "loss_mask": loss_mask,
+            "cu_seqlens": cu_seqlens,
+            "cu_seqlens_padded": cu_seqlens_padded,
+            "max_seqlen": max_seqlen,
+        }
+
+        if not (
+            parallel_state.is_pipeline_first_stage(ignore_virtual=True)
+            or parallel_state.is_pipeline_last_stage(ignore_virtual=True)
+        ):
+            batch["tokens"] = None
+            batch["position_ids"] = None
+            batch["labels"] = None
+            batch["loss_mask"] = None
+
+        if self.local_cp_size is not None:
+            batch["local_cp_size"] = torch.tensor(
+                [self.local_cp_size], dtype=torch.int32, device=dev
+            )
+
+        return batch
+
+
+def _gather_tensor_from_tp_group(tensor):
+    """Gather tensors from all TP ranks for comparison."""
+    assert tensor is not None, "Tensor should not be None"
+    tp_size = parallel_state.get_tensor_model_parallel_world_size()
+    gathered = [torch.zeros_like(tensor) for _ in range(tp_size)]
+    torch.distributed.all_gather(
+        gathered, tensor, group=parallel_state.get_tensor_model_parallel_group()
+    )
+    return gathered
+
+
+def _gather_tensor_from_all_ranks(tensor):
+    """Gather tensors from all PP ranks for comparison."""
+    assert tensor is not None, "Tensor should not be None"
+    if type(tensor) is int:
+        tensor = torch.tensor(tensor, dtype=torch.int32, device=torch.cuda.current_device())
+    gathered = [torch.zeros_like(tensor) for _ in range(torch.distributed.get_world_size())]
+    torch.distributed.all_gather(gathered, tensor)
+    return gathered
+
+
+@pytest.mark.parametrize(
+    ("tp", "pp", "cp"),
+    [
+        (1, 1, 1),  # Basic case: no parallelism
+        (2, 1, 1),  # Tensor parallel only
+        (1, 2, 1),  # Pipeline parallel only
+        (2, 2, 1),  # TP + PP
+        (1, 1, 2),  # CP only
+        (2, 1, 2),  # TP + CP
+        (1, 2, 2),  # PP + CP
+        (1, 4, 1),  # Has middle pp stage
+    ],
+)
+def test_get_batch_on_this_rank_for_sequence_packing(tp, pp, cp):
+    """
+    Test get_batch_on_this_rank_for_sequence_packing function with variable-length THD format.
+
+    This test verifies:
+    1. TP ranks: All ranks within a TP group receive identical data after broadcast
+    2. PP ranks: Middle PP ranks have the same packed_seq_params as first/last stages
+    3. CP ranks: Data is correctly partitioned with proper shape and values
+    4. Variable-length (THD) format: Different sequence lengths are handled correctly
+    """
+    args = SimpleNamespace()
+    args.tensor_model_parallel_size = tp
+    args.pipeline_model_parallel_size = pp
+    args.context_parallel_size = cp
+    args.virtual_pipeline_model_parallel_size = None
+    args.data_parallel_size = 8 // (tp * pp * cp)
+    args.seq_length = 8192
+
+    # Skip invalid configurations
+    if args.data_parallel_size < 1:
+        raise ValueError(f"Invalid config: tp={tp}, pp={pp}, cp={cp} exceeds world size 8")
+
+    # Initialize model parallel
+    Utils.initialize_model_parallel(tp, pp, None, context_parallel_size=cp)
+
+    try:
+        # Create mock data iterator with variable-length sequences
+        # Only TP rank 0 needs the iterator; other TP ranks pass None
+        tp_rank = parallel_state.get_tensor_model_parallel_rank()
+        if tp_rank == 0:
+            # Use deterministic seed based on DP rank so same data within TP/PP/CP group
+            dp_rank = parallel_state.get_data_parallel_rank()
+            sequence_lengths = [1024, 2048, 512, 1536, 3072]
+            assert (
+                sum(sequence_lengths) == args.seq_length
+            ), f"Sequence lengths sum {sum(sequence_lengths)} != total {args.seq_length}"
+            data_iterator = iter(
+                MockVariableLengthSequencePackingDataIterator(
+                    total_seq_length=args.seq_length,
+                    sequence_lengths=sequence_lengths,  # Variable lengths, sum=8192
+                    seed=42 + dp_rank,  # Same seed within PP/CP group
+                )
+            )
+        else:
+            # Non-TP-rank-0 ranks don't need the iterator
+            data_iterator = None
+
+        # Call the function under test
+        result = get_batch_on_this_rank_for_sequence_packing(
+            data_iterator=data_iterator, mtp_on_this_rank=False, vp_stage=None
+        )
+
+        # Unpack the result
+        tokens, labels, loss_mask, attention_mask, position_ids, packed_seq_params = result
+
+        # Get parallel state info
+        tp_rank = parallel_state.get_tensor_model_parallel_rank()
+        pp_rank = parallel_state.get_pipeline_model_parallel_rank()
+        cp_rank = parallel_state.get_context_parallel_rank()
+        is_first_stage = parallel_state.is_pipeline_first_stage(ignore_virtual=True)
+        is_last_stage = parallel_state.is_pipeline_last_stage(ignore_virtual=True)
+        is_first_or_last = is_first_stage or is_last_stage
+
+        # =====================================================================
+        # TEST 1: Verify data based on pipeline stage
+        # =====================================================================
+        if is_first_stage:
+            assert tokens is not None, "First stage should have tokens"
+            assert position_ids is not None, "First stage should have position_ids"
+            assert tokens.dim() == 2, "Tokens should be 2D (batch, seq)"
+            assert position_ids.dim() == 2, "Position IDs should be 2D (batch, seq)"
+            assert tokens.size(0) == 1, "batch should be 1 in THD format"
+            assert position_ids.size(0) == 1, "batch should be 1 in THD format"
+        else:
+            assert tokens is None, "Non-first stage should not have tokens"
+            assert position_ids is None, "Non-first stage should not have position_ids"
+
+        if is_last_stage:
+            assert labels is not None, "Last stage should have labels"
+            assert loss_mask is not None, "Last stage should have loss_mask"
+            assert labels.dim() == 2, "Labels should be 2D (batch, seq)"
+            assert loss_mask.dim() == 2, "Loss mask should be 2D (batch, seq)"
+            assert labels.size(0) == 1, "batch should be 1 in THD format"
+            assert loss_mask.size(0) == 1, "batch should be 1 in THD format"
+        else:
+            assert labels is None, "Non-last stage should not have labels"
+            assert loss_mask is None, "Non-last stage should not have loss_mask"
+
+        # =====================================================================
+        # TEST 2: Verify all ranks have consistent packed_seq_params
+        # =====================================================================
+        assert packed_seq_params is not None
+        assert packed_seq_params.qkv_format == "thd"
+
+        test_keys = [
+            "cu_seqlens_q",
+            "cu_seqlens_q_padded",
+            "max_seqlen_q",
+            "cu_seqlens_kv",
+            "cu_seqlens_kv_padded",
+            "max_seqlen_kv",
+        ]
+        for key in test_keys:
+            tensor = getattr(packed_seq_params, key)
+            assert tensor is not None
+            gathered_tensor = _gather_tensor_from_all_ranks(tensor)
+            for i in range(1, len(gathered_tensor)):
+                assert torch.equal(
+                    gathered_tensor[0], gathered_tensor[i]
+                ), f"Rank 0 and rank {i} have different {key}"
+
+        # =====================================================================
+        # TEST 3: Verify TP ranks receive identical data after broadcast
+        # =====================================================================
+        if tp > 1:
+            test_tensors = []
+            if is_first_stage:
+                test_tensors.extend([tokens, position_ids])
+            if is_last_stage:
+                test_tensors.extend([labels, loss_mask])
+
+            for tensor in test_tensors:
+                gathered_tensors = _gather_tensor_from_tp_group(tensor)
+                for i in range(1, tp):
+                    assert torch.equal(
+                        gathered_tensors[0], gathered_tensors[i]
+                    ), f"TP rank 0 and rank {i} have different data"
+
+        # =====================================================================
+        # TEST 4: Verify CP partitioning
+        # =====================================================================
+        if cp > 1:
+            # With CP, the sequence should be partitioned
+            expected_seq_len = args.seq_length // cp
+
+            if is_first_stage:
+                actual_seq_len = tokens.shape[1]
+                assert (
+                    actual_seq_len == expected_seq_len
+                ), f"CP partitioned tokens have wrong shape: {actual_seq_len} != {expected_seq_len}"
+
+            # Verify labels only if all CP ranks are at last stage
+            if is_last_stage:
+                actual_seq_len = labels.shape[1]
+                assert (
+                    actual_seq_len == expected_seq_len
+                ), f"CP partitioned labels have wrong shape: {actual_seq_len} != {expected_seq_len}"
+
+    finally:
+        Utils.destroy_model_parallel()
+        unset_global_variables()
+
+
+@pytest.mark.parametrize(
+    ("tp", "pp", "cp", "vpp", "scheduler_type"),
+    [
+        (1, 1, 8, None, "dp_balanced"),
+        (2, 1, 4, None, "dp_balanced"),
+        (2, 4, 1, None, "dp_balanced"),
+        (2, 2, 1, None, "dp_balanced"),
+        (1, 4, 1, 4, "dp_balanced"),
+    ],
+)
+def test_wrap_dataloader(tp, pp, cp, vpp, scheduler_type):
+    '''
+    Test wrap_dataloader function with different scheduler types.
+    '''
+    args = SimpleNamespace()
+    args.tensor_model_parallel_size = tp
+    args.pipeline_model_parallel_size = pp
+    args.context_parallel_size = cp
+    args.virtual_pipeline_model_parallel_size = None
+    args.data_parallel_size = 8 // (tp * pp * cp)
+    args.seq_length = 8192
+    args.max_seqlen_per_dp_cp_rank = 8192
+
+    # Skip invalid configurations
+    if args.data_parallel_size < 1:
+        raise ValueError(f"Invalid config: tp={tp}, pp={pp}, cp={cp} exceeds world size 8")
+
+    def _create_single_sample(seq_len):
+        # hard code the padding size to 16
+        pad_size = 16
+        seq_len_padded = ((seq_len + pad_size - 1) // pad_size) * pad_size
+        device = torch.device("cuda", torch.cuda.current_device())
+        tokens = torch.randint(0, 128, (seq_len_padded,), dtype=torch.int64, device=device)
+        labels = tokens + 1
+        position_ids = torch.arange(seq_len_padded, dtype=torch.int64, device=device)
+        loss_mask = torch.ones(seq_len_padded, dtype=torch.float32, device=device)
+        loss_mask[0:seq_len] = 1
+        loss_mask[seq_len:] = 0
+        cu_seqlens = torch.tensor([0, seq_len_padded], dtype=torch.int32, device=device)
+
+        return {
+            'tokens': tokens,
+            'labels': labels,
+            'loss_mask': loss_mask,
+            'position_ids': position_ids,
+            'cu_seqlens': cu_seqlens,
+        }
+
+    # Initialize model parallel
+    Utils.initialize_model_parallel(tp, pp, vpp, context_parallel_size=cp)
+
+    global_batch_size = 64
+    micro_batch_size = 1
+    nums = [random.randint(2048, args.seq_length) for _ in range(global_batch_size)]  # 64 sequences
+
+    config = SimpleNamespace()
+    config.max_seqlen_per_dp_cp_rank = args.max_seqlen_per_dp_cp_rank
+    config.microbatch_group_size_per_vp_stage = pp
+    config.virtual_pipeline_model_parallel_size = vpp
+    config.sequence_packing_scheduler = scheduler_type
+
+    dp_rank = parallel_state.get_data_parallel_rank()
+    dp_size = parallel_state.get_data_parallel_world_size()
+
+    pp_rank = parallel_state.get_pipeline_model_parallel_rank()
+    tp_rank = parallel_state.get_tensor_model_parallel_rank()
+
+    is_pp_first = pp_rank == 0
+    is_pp_last = pp_rank == pp - 1
+    is_pp_first_or_last = is_pp_first or is_pp_last
+    is_tp_first = tp_rank == 0
+
+    num_micro_batches_old = global_batch_size // micro_batch_size // dp_size
+
+    if is_tp_first and (is_pp_first or is_pp_last):
+        samples = [
+            _create_single_sample(num)
+            for num in nums[dp_rank * num_micro_batches_old : (dp_rank + 1) * num_micro_batches_old]
+        ]
+        data_iterator = RerunDataIterator(iter(samples))
+    else:
+        data_iterator = None
+
+    if is_tp_first:
+        if vpp is not None and vpp > 1:
+            if is_pp_first:
+                data_iterator = [data_iterator] + [None for _ in range(vpp - 1)]
+            elif is_pp_last:
+                data_iterator = [None for _ in range(vpp - 1)] + [data_iterator]
+            else:
+                data_iterator = [None for _ in range(vpp)]
+    try:
+        # Call the function under test
+        (
+            new_data_iterator,
+            num_micro_batches,
+            num_total_tokens_this_global_batch,
+            sequence_square_sum_this_global_batch,
+        ) = wrap_data_iterator(data_iterator, config, num_micro_batches_old)
+
+        # check the result
+        assert type(num_micro_batches) is int
+        assert (
+            type(num_total_tokens_this_global_batch) is float
+            or type(num_total_tokens_this_global_batch) is np.float32
+        )
+        assert (
+            type(sequence_square_sum_this_global_batch) is float
+            or type(sequence_square_sum_this_global_batch) is np.float32
+        )
+
+        def _check_batch(batch_all, batch_keys):
+            for batch in batch_all:
+                assert set(batch_keys) <= set(
+                    batch.keys()
+                ), f"batch keys: {set(batch.keys())} missing {set(batch_keys) - set(batch.keys())}"
+                for key in batch_keys:
+                    assert batch[key] is not None
+
+        if is_tp_first:
+            # CHECK KEYS
+            batch_keys = ["cu_seqlens", "max_seqlen", "cu_seqlens_padded"]
+            if vpp is not None and vpp > 1:
+                # check metadata for all stages (save batches to avoid re-consuming iterators)
+                all_stage_batches = []
+                for temp_data_iterator in new_data_iterator:
+                    stage_batch = [next(temp_data_iterator) for _ in range(num_micro_batches)]
+                    all_stage_batches.append(stage_batch)
+                    _check_batch(stage_batch, batch_keys)
+
+                # check for first or last stage on first or last pp rank
+                if is_pp_first_or_last:
+                    batch_all = all_stage_batches[0] if is_pp_first else all_stage_batches[-1]
+                    batch_keys += ["tokens", "position_ids", "labels", "loss_mask"]
+                    _check_batch(batch_all, batch_keys)
+            else:
+                # non-VPP: single iterator
+                batch_all = [next(new_data_iterator) for _ in range(num_micro_batches)]
+                if is_pp_first_or_last:
+                    batch_keys += ["tokens", "position_ids", "labels", "loss_mask"]
+                _check_batch(batch_all, batch_keys)
+
+            # CHECK TOKEN SUM ON FIRST OR LAST PP RANK
+            # Note: data_iterator is consumed by wrap_data_iterator, new_data_iterator is consumed above.
+            # Use `samples` for before-wrap, reuse `batch_all` from the check above for after-wrap.
+            if is_pp_first_or_last:
+                # Compute token sum before wrap
+                token_sum_before = torch.tensor(0, dtype=torch.int64, device='cuda')
+                for sample in samples:
+                    token_sum_before += sample['tokens'].long().sum()
+
+                # Compute token sum after wrap (batch_all already collected above with tokens)
+                token_sum_after = torch.tensor(0, dtype=torch.int64, device='cuda')
+                for batch in batch_all:
+                    token_sum_after += batch['tokens'].long().sum()
+
+                # Reduce sum across dp_cp group and verify equality
+                dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=False)
+                torch.distributed.all_reduce(
+                    token_sum_before, op=torch.distributed.ReduceOp.SUM, group=dp_cp_group
+                )
+                torch.distributed.all_reduce(
+                    token_sum_after, op=torch.distributed.ReduceOp.SUM, group=dp_cp_group
+                )
+
+                assert (
+                    token_sum_before == token_sum_after
+                ), f"Token sum mismatch: before={token_sum_before.item()}, after={token_sum_after.item()}"
+
+        else:
+            if vpp is not None and vpp > 1:
+                assert type(new_data_iterator) is list and len(new_data_iterator) == vpp
+                for data_iterator in new_data_iterator:
+                    assert data_iterator is None
+            else:
+                assert new_data_iterator is None
+
+    finally:
+        Utils.destroy_model_parallel()
+        unset_global_variables()
diff --git a/tests/unit_tests/test_utils.py b/tests/unit_tests/test_utils.py
index dc554612811..95756101e74 100644
--- a/tests/unit_tests/test_utils.py
+++ b/tests/unit_tests/test_utils.py
@@ -4,9 +4,9 @@
 import time
 import urllib.request as req
 from types import SimpleNamespace
+from unittest import mock
 from unittest.mock import patch
 
-import mock
 import numpy as np
 import pytest
 import torch
diff --git a/tests/unit_tests/transformer/experimental_attention_variant/test_absorbed_mla.py b/tests/unit_tests/transformer/experimental_attention_variant/test_absorbed_mla.py
index eb235501ad7..89061ad4219 100644
--- a/tests/unit_tests/transformer/experimental_attention_variant/test_absorbed_mla.py
+++ b/tests/unit_tests/transformer/experimental_attention_variant/test_absorbed_mla.py
@@ -121,7 +121,10 @@ def _forward_thd(self, q, k, v, packed_seq_params):
 
 
 def get_mock_mla_config(
-    tensor_model_parallel_size: int, context_parallel_size: int
+    tensor_model_parallel_size: int,
+    context_parallel_size: int,
+    sequence_parallel: bool,
+    recompute_mla_up_proj: bool,
 ) -> MLATransformerConfig:
     """Create test config with all attributes used in MLA."""
     return MLATransformerConfig(
@@ -141,7 +144,7 @@ def get_mock_mla_config(
         layernorm_zero_centered_gamma=False,
         expert_model_parallel_size=1,
         tensor_model_parallel_size=tensor_model_parallel_size,
-        sequence_parallel=tensor_model_parallel_size > 1,
+        sequence_parallel=tensor_model_parallel_size > 1 and sequence_parallel,
         context_parallel_size=context_parallel_size,
         apply_rope_fusion=False,
         rope_type="yarn",
@@ -153,7 +156,8 @@ def get_mock_mla_config(
         beta_fast=32,
         beta_slow=1,
         rotary_interleaved=False,
-        recompute_granularity=None,
+        recompute_granularity="selective" if recompute_mla_up_proj else None,
+        recompute_modules=["mla_up_proj"] if recompute_mla_up_proj else [],
         fine_grained_activation_offloading=False,
         gradient_accumulation_fusion=False,
         fp8=False,
@@ -227,19 +231,35 @@ def get_mla_submodules(
     )
 
 
-@pytest.mark.parametrize("tp_cp", [[1, 1], [2, 1], [1, 2], [2, 2]])
+# TODO: Consider using get_gpt_layer_with_transformer_engine_spec from
+#       megatron.core.models.gpt.gpt_layer_specs to simplify submodule setup and cover real specs.
+# TODO: Add test case to cover TP > 1 but SP = False.
+
+
+@pytest.mark.parametrize("tp_cp_sp", [[1, 1, False], [2, 1, True], [1, 2, False], [2, 2, True]])
 @pytest.mark.parametrize("qkv_format", ['sbhd', 'thd'])
 @pytest.mark.parametrize("down_proj_use_column_parallel", [False, True])
-def test_functionality(tp_cp: List[int], qkv_format: str, down_proj_use_column_parallel: bool):
+@pytest.mark.parametrize("recompute_mla_up_proj", [False, True])
+def test_functionality(
+    tp_cp_sp: List,
+    qkv_format: str,
+    down_proj_use_column_parallel: bool,
+    recompute_mla_up_proj: bool,
+):
     """Test that AbsorbedMLASelfAttention is equivalent to standard MLA."""
-    tp_size, cp_size = tp_cp
+    tp_size, cp_size, sp = tp_cp_sp
     Utils.initialize_model_parallel(
         tensor_model_parallel_size=tp_size, context_parallel_size=cp_size
     )
     model_parallel_cuda_manual_seed(123)
 
     # Create model
-    config = get_mock_mla_config(tensor_model_parallel_size=tp_size, context_parallel_size=cp_size)
+    config = get_mock_mla_config(
+        tensor_model_parallel_size=tp_size,
+        context_parallel_size=cp_size,
+        sequence_parallel=sp,
+        recompute_mla_up_proj=recompute_mla_up_proj,
+    )
     absorbed_submodules = get_absorbed_mla_submodules(
         down_proj_use_column_parallel=down_proj_use_column_parallel,
         qk_layernorm=True,
@@ -295,13 +315,15 @@ def test_functionality(tp_cp: List[int], qkv_format: str, down_proj_use_column_p
             qkv_format='thd',
         )
         hidden_states = torch.randn(
-            (total_tokens // tp_size // cp_size, 1, config.hidden_size),
+            (total_tokens // cp_size // (tp_size if sp else 1), 1, config.hidden_size),
             dtype=torch.bfloat16,
             device='cuda',
         )
         grads = torch.randn_like(hidden_states)
     else:
-        seqlen = 1024 // tp_size // cp_size
+        # When SP is enabled, sequence is sharded across TP ranks
+        # When SP is disabled, each TP rank has the full sequence
+        seqlen = 1024 // cp_size // (tp_size if sp else 1)
         hidden_states = torch.randn((seqlen, 3, 7168), dtype=torch.bfloat16, device='cuda')
         grads = torch.randn_like(hidden_states)
         packed_seq_params = None
diff --git a/tests/unit_tests/transformer/moe/test_token_dispatcher.py b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
index 6ff8fcdc6e5..91f32719d07 100644
--- a/tests/unit_tests/transformer/moe/test_token_dispatcher.py
+++ b/tests/unit_tests/transformer/moe/test_token_dispatcher.py
@@ -1,7 +1,8 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import copy
 import dataclasses
+from types import SimpleNamespace
 
 import pytest
 import torch
@@ -10,6 +11,7 @@
 from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_local_submodules
 from megatron.core.transformer.moe.moe_layer import MoELayer
 from megatron.core.transformer.moe.moe_utils import get_capacity
+from megatron.core.transformer.moe.token_dispatcher import MoETokenDispatcher
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.typed_torch import apply_module
 from megatron.core.utils import is_te_min_version
@@ -33,6 +35,48 @@ def token_unpermutation(token_dispatcher, hidden_states):
     return hidden_states, None
 
 
+class _NestedAttrTestDispatcher(MoETokenDispatcher):
+    def dispatch_preprocess(self, tokens, routing_map, probs):
+        raise NotImplementedError
+
+    def token_dispatch(self, hidden_states, probs):
+        raise NotImplementedError
+
+    def dispatch_postprocess(self, hidden_states, probs):
+        raise NotImplementedError
+
+    def combine_preprocess(self, hidden_states):
+        raise NotImplementedError
+
+    def token_combine(self, hidden_states):
+        raise NotImplementedError
+
+    def combine_postprocess(self, hidden_states):
+        raise NotImplementedError
+
+
+def test_get_cudagraph_attr_supports_nested_paths():
+    dispatcher = object.__new__(_NestedAttrTestDispatcher)
+    token_probs = torch.randn(2, 3)
+    dispatcher._comm_manager = SimpleNamespace(
+        token_probs=token_probs, nested=SimpleNamespace(routing_map=torch.randn(2, 4))
+    )
+
+    assert dispatcher.get_cudagraph_attr("_comm_manager.token_probs") is token_probs
+    assert dispatcher.get_cudagraph_attr("_comm_manager.nested.routing_map") is not None
+    assert dispatcher.get_cudagraph_attr("_comm_manager.missing_attr") is None
+
+
+def test_set_cudagraph_attr_supports_nested_paths():
+    dispatcher = object.__new__(_NestedAttrTestDispatcher)
+    dispatcher._comm_manager = SimpleNamespace(routing_map=None)
+    routing_map = torch.randn(4, 5)
+
+    dispatcher.set_cudagraph_attr("_comm_manager.routing_map", routing_map)
+
+    assert dispatcher._comm_manager.routing_map is routing_map
+
+
 class MoEModelTestContainer:
     def __init__(
         self,
diff --git a/tests/unit_tests/transformer/test_attention.py b/tests/unit_tests/transformer/test_attention.py
index 15d95128bac..55a4b8a4864 100644
--- a/tests/unit_tests/transformer/test_attention.py
+++ b/tests/unit_tests/transformer/test_attention.py
@@ -1,11 +1,13 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 import copy
 from unittest import mock
 
+import einops
 import pytest
 import torch
 from packaging import version
+from torch.nn import functional as F
 
 import megatron.core.parallel_state as parallel_state
 from megatron.core.hyper_comm_grid import HyperCommGrid
@@ -13,6 +15,7 @@
     get_pos_emb_on_this_cp_rank as get_tensor_on_this_cp_rank,
 )
 from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_layer_local_spec,
     get_gpt_layer_with_transformer_engine_spec,
     get_gpt_layer_with_transformer_engine_submodules,
 )
@@ -21,6 +24,10 @@
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
 from megatron.core.transformer import TransformerConfig
 from megatron.core.transformer.attention import SelfAttention
+from megatron.core.transformer.dot_product_attention_context_parallel import (
+    AttentionFuncionWithContextParallel,
+    to_zz_mask_attn_bias,
+)
 from megatron.core.transformer.enums import AttnMaskType
 from megatron.core.utils import is_te_min_version
 from megatron.training.arguments import parse_args
@@ -34,6 +41,7 @@
     init_checkpointing_mock_args,
 )
 from tests.unit_tests.test_utilities import Utils
+from tests.unit_tests.transformer.test_multi_latent_attention import make_test_packed_seq_params
 
 try:
     from transformer_engine.pytorch.attention.rope import apply_fused_qkv_rotary_pos_emb
@@ -44,10 +52,19 @@
 
 
 @pytest.mark.parametrize("output_gate", [False, True])
+@pytest.mark.parametrize(
+    ("transformer_impl", "fallback_to_eager_attn"),
+    [("transformer_engine", False), ("transformer_engine", True), ("native", False)],
+)
 class TestParallelAttention:
 
     @pytest.fixture(scope='function', autouse=True)
-    def setup_method(self, output_gate):
+    def setup_method(self, output_gate, transformer_impl, fallback_to_eager_attn):
+        if output_gate:
+            if transformer_impl == "native":
+                pytest.skip("Native implementation does not support output gate.")
+            if fallback_to_eager_attn:
+                pytest.skip("No need to test output gate for fallback_to_eager_attn = True.")
         Utils.initialize_model_parallel(1, 1)
         model_parallel_cuda_manual_seed(123)
         self.transformer_config = TransformerConfig(
@@ -58,11 +75,17 @@ def setup_method(self, output_gate):
             bf16=True,
             params_dtype=torch.bfloat16,
             attention_output_gate=output_gate,
+            transformer_impl=transformer_impl,
+            fallback_to_eager_attn=fallback_to_eager_attn,
         )
+        if transformer_impl == "transformer_engine":
+            attn_layer_spec = (
+                get_gpt_layer_with_transformer_engine_submodules().self_attention.submodules
+            )
+        else:
+            attn_layer_spec = get_gpt_layer_local_spec().submodules.self_attention.submodules
         self.parallel_attention = SelfAttention(
-            self.transformer_config,
-            get_gpt_layer_with_transformer_engine_submodules().self_attention.submodules,
-            layer_number=1,
+            self.transformer_config, attn_layer_spec, layer_number=1
         )
 
     def teardown_method(self):
@@ -73,10 +96,19 @@ def test_constructor(self):
         assert self.parallel_attention.layer_number == 1
 
         num_weights = sum([p.numel() for p in self.parallel_attention.parameters()])
+
+        hidden_size = self.transformer_config.hidden_size
+        standard_num_weights = (
+            hidden_size * hidden_size * 4 + hidden_size * 4  # QKVO weight  # QKVO bias
+        )
         if self.transformer_config.attention_output_gate:
-            assert num_weights == 82816
-        else:
-            assert num_weights == 66304
+            standard_num_weights += hidden_size * hidden_size + hidden_size  # Gate weight and bias
+        if self.transformer_config.transformer_impl == "transformer_engine":
+            standard_num_weights += hidden_size * 2  # fused pre layernorm weight and bias
+
+        assert (
+            num_weights == standard_num_weights
+        ), f"{num_weights=} does not match {standard_num_weights=}."
 
     def test_cpu_forward(self):
         # we can't currently do this because the global memory buffer is on GPU
@@ -111,6 +143,8 @@ def test_gpu_forward(self):
     @pytest.mark.parametrize("rotary_interleaved", [True, False])
     @pytest.mark.parametrize("fused_qkv_rope", [True, False])
     def test_fused_rope_gpu_forward(self, rotary_interleaved, fused_qkv_rope):
+        if self.transformer_config.fallback_to_eager_attn:
+            pytest.skip("No need to test fused RoPE for fallback_to_eager_attn = True.")
         self.parallel_attention.config.apply_rope_fusion = True
         if rotary_interleaved and not is_te_min_version("2.3.0"):
             pytest.skip("Only TE >= 2.3.0 supports interleaved fused RoPE.")
@@ -374,6 +408,199 @@ class TestSelfAttention:
 
     @pytest.fixture(scope='function', autouse=True)
     def setup_method(self, output_gate):
+        self.output_gate = output_gate
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self):
+        Utils.destroy_model_parallel()
+
+    def test_clip_qk_disabled_raises_error(self):
+        """Test that clip_qk raises ValueError when qk_clip is not enabled."""
+        transformer_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=128,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            qk_clip=False,
+        )
+        attention = SelfAttention(
+            transformer_config,
+            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            layer_number=1,
+        )
+
+        with pytest.raises(ValueError, match="qk_clip option needs to be enabled"):
+            attention.clip_qk()
+
+    def test_clip_qk_none_logits_raises_error(self):
+        """Test that clip_qk raises ValueError when current_max_attn_logits is None."""
+        transformer_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=128,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            qk_clip=True,
+            qk_clip_threshold=100.0,
+            qk_clip_alpha=0.5,
+        )
+        attention = SelfAttention(
+            transformer_config,
+            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            layer_number=1,
+        )
+
+        with pytest.raises(ValueError, match="current_max_attn_logits is None"):
+            attention.clip_qk()
+
+    def test_clip_qk_below_threshold_no_update(self):
+        """Test that weights are not updated when max logits are below threshold."""
+        transformer_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=128,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            qk_clip=True,
+            qk_clip_threshold=100.0,
+            qk_clip_alpha=0.5,
+        )
+        attention = SelfAttention(
+            transformer_config,
+            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            layer_number=1,
+        )
+        attention.cuda()
+
+        # Save original weights
+        original_weight = attention.linear_qkv.weight.data.clone()
+
+        # Set current_max_attn_logits below threshold
+        attention.core_attention.current_max_attn_logits = torch.tensor(
+            [50.0, 60.0, 70.0, 80.0], device='cuda'
+        )
+
+        # Call clip_qk
+        attention.clip_qk()
+
+        # Weights should not be updated
+        assert torch.equal(attention.linear_qkv.weight.data, original_weight)
+        # current_max_attn_logits should be reset
+        assert attention.core_attention.current_max_attn_logits is None
+
+    def test_clip_qk_above_threshold_updates_weights(self):
+        """Test that weights are updated when max logits exceed threshold."""
+        transformer_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=128,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            qk_clip=True,
+            qk_clip_threshold=100.0,
+            qk_clip_alpha=0.5,
+        )
+        attention = SelfAttention(
+            transformer_config,
+            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            layer_number=1,
+        )
+        attention.cuda()
+
+        # Save original weights
+        original_weight = attention.linear_qkv.weight.data.clone()
+
+        # Set current_max_attn_logits above threshold
+        attention.core_attention.current_max_attn_logits = torch.tensor(
+            [150.0, 160.0, 170.0, 180.0], device='cuda'
+        )
+
+        # Call clip_qk
+        attention.clip_qk()
+
+        # Weights should be updated
+        assert not torch.equal(attention.linear_qkv.weight.data, original_weight)
+        # current_max_attn_logits should be reset
+        assert attention.core_attention.current_max_attn_logits is None
+
+    def test_clip_qk_gqa_configuration(self):
+        """Test clip_qk with GQA (Grouped Query Attention) configuration."""
+        transformer_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=128,
+            num_attention_heads=8,
+            num_query_groups=4,  # GQA with 2 heads per group
+            use_cpu_initialization=True,
+            qk_clip=True,
+            qk_clip_threshold=100.0,
+            qk_clip_alpha=0.5,
+        )
+        attention = SelfAttention(
+            transformer_config,
+            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            layer_number=1,
+        )
+        attention.cuda()
+
+        # Save original weights
+        original_weight = attention.linear_qkv.weight.data.clone()
+
+        # Set current_max_attn_logits for all heads (8 heads)
+        attention.core_attention.current_max_attn_logits = torch.tensor(
+            [150.0, 160.0, 170.0, 180.0, 190.0, 200.0, 210.0, 220.0], device='cuda'
+        )
+
+        # Call clip_qk
+        attention.clip_qk()
+
+        # Weights should be updated
+        assert not torch.equal(attention.linear_qkv.weight.data, original_weight)
+        # current_max_attn_logits should be reset
+        assert attention.core_attention.current_max_attn_logits is None
+
+    def test_clip_qk_mixed_logits(self):
+        """Test clip_qk with mixed logits (some above, some below threshold)."""
+        transformer_config = TransformerConfig(
+            num_layers=2,
+            hidden_size=128,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            qk_clip=True,
+            qk_clip_threshold=100.0,
+            qk_clip_alpha=0.5,
+        )
+        attention = SelfAttention(
+            transformer_config,
+            get_gpt_layer_with_transformer_engine_spec().submodules.self_attention.submodules,
+            layer_number=1,
+        )
+        attention.cuda()
+
+        # Save original weights
+        original_weight = attention.linear_qkv.weight.data.clone()
+
+        # Set mixed current_max_attn_logits (some above, some below threshold)
+        attention.core_attention.current_max_attn_logits = torch.tensor(
+            [80.0, 150.0, 90.0, 200.0], device='cuda'
+        )
+
+        # Call clip_qk
+        attention.clip_qk()
+
+        # Weights should be updated since at least one head exceeds threshold
+        assert not torch.equal(attention.linear_qkv.weight.data, original_weight)
+        # current_max_attn_logits should be reset
+        assert attention.core_attention.current_max_attn_logits is None
+
+
+@pytest.mark.parametrize("output_gate", [False, True])
+@pytest.mark.parametrize("transformer_impl", ["transformer_engine", "native"])
+class TestSelfAttention:
+
+    @pytest.fixture(scope='function', autouse=True)
+    def setup_method(self, output_gate, transformer_impl):
+        if transformer_impl == "native":
+            if output_gate:
+                pytest.skip("Native implementation does not support output gate.")
+        self.transformer_impl = transformer_impl
         self.output_gate = output_gate
         Utils.destroy_model_parallel()
 
@@ -389,10 +616,17 @@ def run_self_attention(self, pg_collection):
             attention_output_gate=self.output_gate,
             tensor_model_parallel_size=tensor_model_parallel_size,
             use_cpu_initialization=False,
+            transformer_impl=self.transformer_impl,
         )
+        if self.transformer_impl == "transformer_engine":
+            attn_layer_spec = (
+                get_gpt_layer_with_transformer_engine_submodules().self_attention.submodules
+            )
+        else:
+            attn_layer_spec = get_gpt_layer_local_spec().submodules.self_attention.submodules
         self.self_attention = SelfAttention(
             self.transformer_config,
-            get_gpt_layer_with_transformer_engine_submodules().self_attention.submodules,
+            attn_layer_spec,
             layer_number=1,
             attn_mask_type=AttnMaskType.causal,
             pg_collection=pg_collection,
@@ -479,6 +713,7 @@ def _test_parallel_attention_correctness(
     seed=123,
     sequence_length=256,
     micro_batch_size=4,
+    sequence_packing=False,
 ):
     # Model initialization function
     def initialize_gpt_model(
@@ -572,17 +807,24 @@ def initialize_gpt_model(
         def get_tensor_on_this_rank(tensor):
             if cp > 1:
                 tensor = get_tensor_on_this_cp_rank(tensor, 0, cp_group)
+            if sequence_packing:
+                tensor = tensor.transpose(0, 1).contiguous().view(-1, 1, *tensor.shape[2:])
             if tp > 1 and sp:
-                sp_seg = sequence_length // tp // cp
+                sp_seg = tensor.shape[0] // tp
                 tensor = tensor[tp_rank * sp_seg : (tp_rank + 1) * sp_seg]
             return tensor
 
         # Calculate parallel model output
+        if sequence_packing:
+            cu_seqlens = [i * sequence_length for i in range(micro_batch_size + 1)]
+            packed_seq_params = make_test_packed_seq_params(cu_seqlens=cu_seqlens)
+        else:
+            packed_seq_params = None
         input_hidden_states = get_tensor_on_this_rank(input_hidden_states)
         input_hidden_states = input_hidden_states.detach().requires_grad_(True)
         parallel_attention = gpt_model[0].decoder.layers[0].self_attention
         output_hidden_states_parallel, bias_hidden_states_parallel = parallel_attention(
-            input_hidden_states, attention_mask=None
+            input_hidden_states, attention_mask=None, packed_seq_params=packed_seq_params
         )
         output_hidden_states_parallel.sum().backward()
         input_grad_parallel = input_hidden_states.grad.detach()
@@ -647,6 +889,8 @@ def get_tensor_on_this_rank(tensor):
         Utils.destroy_model_parallel()
 
 
+# TODO(yuzhongw): Add test case for fallback_to_eager_attn
+@pytest.mark.parametrize("sequence_packing", [False, True])
 @pytest.mark.parametrize("apply_rope_fusion", [False, True])
 @pytest.mark.parametrize(
     ("tp", "sp", "cp"),
@@ -661,7 +905,7 @@ def get_tensor_on_this_rank(tensor):
 @pytest.mark.parametrize("qk_layernorm", [False, True])
 @pytest.mark.parametrize("output_gate", [False, True])
 def test_parallel_attention_correctness(
-    tmp_path_dist_ckpt, apply_rope_fusion, tp, sp, cp, qk_layernorm, output_gate
+    tmp_path_dist_ckpt, sequence_packing, apply_rope_fusion, tp, sp, cp, qk_layernorm, output_gate
 ):
     transformer_config = TransformerConfig(
         num_layers=1,
@@ -690,6 +934,7 @@ def test_parallel_attention_correctness(
         cp=cp,
         seed=123,
         sequence_length=256,
+        sequence_packing=sequence_packing,
     )
 
 
@@ -724,3 +969,133 @@ def test_parallel_attention_correctness_num_query_groups_less_than_tp_size(
         seed=123,
         sequence_length=256,
     )
+
+
+def _torch_native_attention(query, key, value, attention_mask, sinks, scaling: float):
+    """Torch native attention implementation
+    This was not in the original implementation and slightly affect results;
+    it prevents overflow in BF16/FP16 when training with batch size > 1 we clamp max values.
+    """
+    # Rearrange query, key, value to (b, h, s, d)
+    query = einops.rearrange(query, 's b h d -> b h s d')
+    key = einops.rearrange(key, 's b h d -> b h s d')
+    value = einops.rearrange(value, 's b h d -> b h s d')
+
+    # Compute attention weights
+    attn_weights = torch.matmul(query, key.transpose(2, 3)) * scaling
+    if attention_mask is not None:
+        nheads = query.shape[1]
+        nheads_k = key.shape[1]
+        heads_k_stride = 1
+        mask_bias = to_zz_mask_attn_bias(
+            attention_mask, 1, nheads, nheads_k, heads_k_stride, query.device, query.dtype
+        )
+        attn_weights = attn_weights + mask_bias
+
+    # Add sinks to attention weights
+    if sinks is None:
+        combined_logits = attn_weights
+    else:
+        sinks = sinks.reshape(1, -1, 1, 1).expand(query.shape[0], -1, query.shape[-2], -1)
+        combined_logits = torch.cat([attn_weights, sinks], dim=-1)
+
+    # Compute attention scores
+    probs = F.softmax(combined_logits, dim=-1, dtype=combined_logits.dtype)
+    if sinks is None:
+        scores = probs
+    else:
+        scores = probs[..., :-1]
+
+    # Compute attention output
+    attn_output = torch.matmul(scores, value)
+    attn_output = einops.rearrange(attn_output, 'b h s d -> s b h d')
+    attn_output = attn_output.contiguous()
+    return attn_output
+
+
+def test_eager_attention_function_correctness():
+    """Test the correctness of the context parallel eager attention function"""
+
+    # Configuration
+    batch_size = 4
+    num_heads = 2
+    head_dim = 256
+    seq_len_q = 512
+    seq_len_k = 2048
+    scale = 1 / (head_dim**2)
+
+    # Initialize inputs
+    q = torch.rand(
+        (seq_len_q, batch_size, num_heads, head_dim),
+        device='cuda',
+        dtype=torch.bfloat16,
+        requires_grad=True,
+    )
+    k = torch.rand(
+        (seq_len_k, batch_size, num_heads, head_dim),
+        device='cuda',
+        dtype=torch.bfloat16,
+        requires_grad=True,
+    )
+    v = torch.rand(
+        (seq_len_k, batch_size, num_heads, head_dim),
+        device='cuda',
+        dtype=torch.bfloat16,
+        requires_grad=True,
+    )
+
+    def randbool(shape, **kwargs):
+        return torch.randn(shape, **kwargs) > 0
+
+    attn_bias = randbool((batch_size, 1, seq_len_q, seq_len_k), device='cuda')
+    sinks = None
+
+    # Torch native attention forward and backward pass
+    out_torch = _torch_native_attention(
+        query=q, key=k, value=v, attention_mask=attn_bias, sinks=sinks, scaling=scale
+    )
+    loss_torch = out_torch.sum()
+    loss_torch.backward()
+    torch_q_grad = q.grad.clone()
+    torch_k_grad = k.grad.clone()
+    torch_v_grad = v.grad.clone()
+    q.grad.zero_()
+    k.grad.zero_()
+    v.grad.zero_()
+    if sinks is not None:
+        torch_sinks_grad = sinks.grad.clone()
+        sinks.grad.zero_()
+    else:
+        torch_sinks_grad = None
+
+    # Custom attention forward and backward pass
+    out_custom = AttentionFuncionWithContextParallel.apply(
+        q, k, v, attn_bias, 0.0, scale, None  # dropout
+    )
+    loss_custom = out_custom.sum()
+    loss_custom.backward()
+    custom_q_grad = q.grad.clone()
+    custom_k_grad = k.grad.clone()
+    custom_v_grad = v.grad.clone()
+    q.grad.zero_()
+    k.grad.zero_()
+    v.grad.zero_()
+    if sinks is not None:
+        custom_sinks_grad = sinks.grad.clone()
+        sinks.grad.zero_()
+    else:
+        custom_sinks_grad = None
+
+    # Check attention output and gradients
+    assert torch.equal(out_custom, out_torch), "Mismatch in attention output"
+    tol = {"atol": 1e-4, "rtol": 1e-4}
+    for tensor_name, tensor_torch, tensor_custom in [
+        ("q_grad", torch_q_grad, custom_q_grad),
+        ("k_grad", torch_k_grad, custom_k_grad),
+        ("v_grad", torch_v_grad, custom_v_grad),
+        ("sinks_grad", torch_sinks_grad, custom_sinks_grad),
+    ]:
+        if (tensor_torch is not None) and (tensor_custom is not None):
+            torch.testing.assert_close(
+                out_custom, out_torch, **tol, msg=lambda msg: f"Mismatch in {tensor_name}: {msg}"
+            )
diff --git a/tests/unit_tests/transformer/test_hyper_connection_recompute.py b/tests/unit_tests/transformer/test_hyper_connection_recompute.py
new file mode 100644
index 00000000000..cf44f2d7cd0
--- /dev/null
+++ b/tests/unit_tests/transformer/test_hyper_connection_recompute.py
@@ -0,0 +1,408 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+"""
+Unit tests for HyperConnection block-level recomputation.
+
+Tests the following functionality:
+1. HyperConnectionModule._forward_with_checkpoint correctness
+2. HyperConnectionModule.apply_h_post with CheckpointManager
+3. Multiple HyperConnectionModules chained with a single CheckpointManager
+4. Partial checkpoint (last layer not checkpointed)
+5. TransformerConfig 'mhc' in recompute_modules option
+"""
+
+import pytest
+import torch
+
+from megatron.core.tensor_parallel.random import CheckpointManager, model_parallel_cuda_manual_seed
+from megatron.core.transformer.hyper_connection import HyperConnectionModule
+from megatron.core.transformer.transformer_config import TransformerConfig
+from tests.unit_tests.test_utilities import Utils
+
+
+class TestHyperConnectionCheckpoint:
+    """Test HyperConnectionModule checkpoint functionality."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def _create_hyper_connection_module(self, hidden_size=64, num_residual_streams=4):
+        """Create a HyperConnectionModule for testing."""
+        config = TransformerConfig(
+            num_layers=2,
+            hidden_size=hidden_size,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            enable_hyper_connections=True,
+            num_residual_streams=num_residual_streams,
+            mhc_sinkhorn_iterations=5,  # Fewer iterations for faster tests
+            mhc_init_gating_factor=0.01,
+        )
+        module = HyperConnectionModule(config=config, layer_number=1)
+        module.cuda()
+        return module
+
+    def test_forward_normal_vs_checkpoint_correctness(self):
+        """
+        Test that _forward_with_checkpoint produces the same outputs as _forward_normal.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+
+        module = self._create_hyper_connection_module(hidden_size, num_streams)
+
+        # Create input tensors
+        hidden_states = torch.randn(
+            seq_len, batch_size, num_streams * hidden_size, device='cuda', requires_grad=True
+        )
+        residual = torch.randn(
+            seq_len, batch_size, num_streams * hidden_size, device='cuda', requires_grad=True
+        )
+
+        # Clone inputs for comparison
+        hidden_states_ckpt = hidden_states.detach().clone().requires_grad_(True)
+        residual_ckpt = residual.detach().clone().requires_grad_(True)
+
+        # Forward without checkpoint (reference)
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+        aggregated_ref, h_res_ref, h_post_ref = module._forward_normal(hidden_states)
+        mixed_ref = module.apply_h_res(h_res_ref, residual)
+        loss_ref = aggregated_ref.sum() + mixed_ref.sum() + h_post_ref.sum()
+        loss_ref.backward()
+        grad_hidden_ref = hidden_states.grad.clone()
+        grad_residual_ref = residual.grad.clone()
+
+        # Forward with checkpoint
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+        manager = CheckpointManager()
+        aggregated_ckpt, h_res_ckpt, h_post_ckpt = module._forward_with_checkpoint(
+            hidden_states_ckpt, manager
+        )
+        mixed_ckpt = module.apply_h_res(h_res_ckpt, residual_ckpt)
+        # Calculate loss before discarding outputs
+        loss_ckpt = aggregated_ckpt.sum() + mixed_ckpt.sum() + h_post_ckpt.sum()
+
+        # Register unified recompute hook
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        # Backward pass
+        loss_ckpt.backward()
+        grad_hidden_ckpt = hidden_states_ckpt.grad.clone()
+        grad_residual_ckpt = residual_ckpt.grad.clone()
+
+        # Verify gradients match
+        assert torch.allclose(grad_hidden_ckpt, grad_hidden_ref, atol=1e-5), (
+            f"Hidden states gradients mismatch:\n"
+            f"Checkpoint: {grad_hidden_ckpt}\n"
+            f"Reference: {grad_hidden_ref}"
+        )
+        assert torch.allclose(grad_residual_ckpt, grad_residual_ref, atol=1e-5), (
+            f"Residual gradients mismatch:\n"
+            f"Checkpoint: {grad_residual_ckpt}\n"
+            f"Reference: {grad_residual_ref}"
+        )
+
+    def test_apply_h_post_with_checkpoint(self):
+        """
+        Test that apply_h_post with manager produces correct gradients.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+
+        module = self._create_hyper_connection_module(hidden_size, num_streams)
+
+        # Create input tensors
+        x = torch.randn(seq_len, batch_size, hidden_size, device='cuda', requires_grad=True)
+        bias = torch.randn(hidden_size, device='cuda')
+        h_post = torch.randn(seq_len, batch_size, num_streams, device='cuda', requires_grad=True)
+
+        # Clone inputs
+        x_ckpt = x.detach().clone().requires_grad_(True)
+        h_post_ckpt = h_post.detach().clone().requires_grad_(True)
+
+        # Reference: without checkpoint (manager=None)
+        torch.manual_seed(42)
+        x_out_ref, bias_out_ref = module.apply_h_post((x, bias), h_post, manager=None)
+        loss_ref = x_out_ref.sum()
+        if bias_out_ref is not None:
+            loss_ref = loss_ref + bias_out_ref.sum()
+        loss_ref.backward()
+        grad_x_ref = x.grad.clone()
+        grad_h_post_ref = h_post.grad.clone()
+
+        # With checkpoint (manager provided)
+        torch.manual_seed(42)
+        manager = CheckpointManager()
+        x_out_ckpt, bias_out_ckpt = module.apply_h_post(
+            (x_ckpt, bias), h_post_ckpt, manager=manager
+        )
+        loss_ckpt = x_out_ckpt.sum()
+        if bias_out_ckpt is not None:
+            loss_ckpt = loss_ckpt + bias_out_ckpt.sum()
+
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+        loss_ckpt.backward()
+        grad_x_ckpt = x_ckpt.grad.clone()
+        grad_h_post_ckpt = h_post_ckpt.grad.clone()
+
+        # Verify gradients
+        assert torch.allclose(grad_x_ckpt, grad_x_ref, atol=1e-5)
+        assert torch.allclose(grad_h_post_ckpt, grad_h_post_ref, atol=1e-5)
+
+    def test_forward_with_manager_parameter(self):
+        """
+        Test forward() method with mhc_recompute_manager parameter.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+
+        module = self._create_hyper_connection_module(hidden_size, num_streams)
+
+        # Create input tensors
+        hidden_states = torch.randn(
+            seq_len, batch_size, num_streams * hidden_size, device='cuda', requires_grad=True
+        )
+
+        # Clone inputs
+        hidden_states_ckpt = hidden_states.detach().clone().requires_grad_(True)
+
+        # Reference: forward without manager (uses _forward_normal)
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+        aggregated_ref, h_res_ref, h_post_ref = module.forward(
+            hidden_states, mhc_recompute_manager=None
+        )
+        loss_ref = aggregated_ref.sum() + h_res_ref.sum() + h_post_ref.sum()
+        loss_ref.backward()
+        grad_hidden_ref = hidden_states.grad.clone()
+
+        # With manager (uses _forward_with_checkpoint)
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+        manager = CheckpointManager()
+        aggregated_ckpt, h_res_ckpt, h_post_ckpt = module.forward(
+            hidden_states_ckpt, mhc_recompute_manager=manager
+        )
+        loss_ckpt = aggregated_ckpt.sum() + h_res_ckpt.sum() + h_post_ckpt.sum()
+
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+        loss_ckpt.backward()
+        grad_hidden_ckpt = hidden_states_ckpt.grad.clone()
+
+        # Verify gradients match
+        assert torch.allclose(grad_hidden_ckpt, grad_hidden_ref, atol=1e-5)
+
+
+class TestMHCBlockRecomputeIntegration:
+    """Test CheckpointManager integration with HyperConnection."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_multiple_hyper_connections_in_chain(self):
+        """
+        Test that multiple HyperConnectionModules can be chained together
+        with a single CheckpointManager.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+        n_channels = num_streams * hidden_size
+
+        # Create multiple HyperConnection modules (simulating multiple layers)
+        config = TransformerConfig(
+            num_layers=4,
+            hidden_size=hidden_size,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            enable_hyper_connections=True,
+            num_residual_streams=num_streams,
+            mhc_sinkhorn_iterations=5,
+            mhc_init_gating_factor=0.01,
+        )
+
+        modules = [
+            HyperConnectionModule(config=config, layer_number=i + 1).cuda() for i in range(3)
+        ]
+
+        # Create input tensors
+        hidden_states_ref = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        residual_ref = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+
+        hidden_states_ckpt = hidden_states_ref.detach().clone().requires_grad_(True)
+        residual_ckpt = residual_ref.detach().clone().requires_grad_(True)
+
+        # Reference: forward without checkpoint
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+
+        h = hidden_states_ref
+        r = residual_ref
+        for module in modules:
+            agg, h_res, h_post = module.forward(h, mhc_recompute_manager=None)
+            agg, _ = module.apply_h_post((0.1 * agg, None), h_post, manager=None)
+            mixed = module.apply_h_res(h_res, r)  # Apply h_res to get mixed [s, b, n*C]
+            h = agg + mixed
+            r = h
+
+        loss_ref = h.sum()
+        loss_ref.backward()
+        grad_hidden_ref = hidden_states_ref.grad.clone()
+        grad_residual_ref = residual_ref.grad.clone()
+
+        # With checkpoint using single manager
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+
+        manager = CheckpointManager()
+
+        h = hidden_states_ckpt
+        r = residual_ckpt
+        for module in modules:
+            agg, h_res, h_post = module.forward(h, mhc_recompute_manager=manager)
+            agg, _ = module.apply_h_post((0.1 * agg, None), h_post, manager=manager)
+            mixed = module.apply_h_res(h_res, r)  # Apply h_res to get mixed [s, b, n*C]
+            h = agg + mixed
+            r = h
+
+        loss_ckpt = h.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+        loss_ckpt.backward()
+
+        grad_hidden_ckpt = hidden_states_ckpt.grad.clone()
+        grad_residual_ckpt = residual_ckpt.grad.clone()
+
+        # Verify gradients
+        assert torch.allclose(
+            grad_hidden_ckpt, grad_hidden_ref, atol=1e-4
+        ), f"Chained HyperConnection hidden gradients mismatch"
+        assert torch.allclose(
+            grad_residual_ckpt, grad_residual_ref, atol=1e-4
+        ), f"Chained HyperConnection residual gradients mismatch"
+
+    def test_partial_checkpoint_last_layer_not_checkpointed(self):
+        """
+        Test that when is_last_layer_in_block=True, the final output is NOT checkpointed.
+        This simulates the TransformerBlock behavior where the last layer's MLP BDA
+        serves as the hook_tensor for unified recompute.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+
+        config = TransformerConfig(
+            num_layers=2,
+            hidden_size=hidden_size,
+            num_attention_heads=4,
+            use_cpu_initialization=True,
+            enable_hyper_connections=True,
+            num_residual_streams=num_streams,
+            mhc_sinkhorn_iterations=5,
+            mhc_init_gating_factor=0.01,
+        )
+
+        module = HyperConnectionModule(config=config, layer_number=1).cuda()
+
+        hidden_states_ref = torch.randn(
+            seq_len, batch_size, num_streams * hidden_size, device='cuda', requires_grad=True
+        )
+        residual_ref = torch.randn(
+            seq_len, batch_size, num_streams * hidden_size, device='cuda', requires_grad=True
+        )
+
+        hidden_states_ckpt = hidden_states_ref.detach().clone().requires_grad_(True)
+        residual_ckpt = residual_ref.detach().clone().requires_grad_(True)
+
+        # Reference
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+        aggregated_ref, h_res_ref, h_post_ref = module.forward(
+            hidden_states_ref, mhc_recompute_manager=None
+        )
+        aggregated_ref, _ = module.apply_h_post(
+            (0.1 * aggregated_ref, None), h_post_ref, manager=None
+        )
+        mixed_ref = module.apply_h_res(
+            h_res_ref, residual_ref
+        )  # Apply h_res to get mixed [s, b, n*C]
+        # Simulate BDA that is NOT checkpointed (last layer)
+        output_ref = aggregated_ref + 0.5 * mixed_ref
+        loss_ref = output_ref.sum()
+        loss_ref.backward()
+        grad_hidden_ref = hidden_states_ref.grad.clone()
+
+        # With manager - checkpoint everything except final output
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+        manager = CheckpointManager()
+        aggregated_ckpt, h_res_ckpt, h_post_ckpt = module.forward(
+            hidden_states_ckpt, mhc_recompute_manager=manager
+        )
+
+        aggregated_ckpt, _ = module.apply_h_post(
+            (0.1 * aggregated_ckpt, None), h_post_ckpt, manager=manager
+        )
+        mixed_ckpt = module.apply_h_res(
+            h_res_ckpt, residual_ckpt
+        )  # Apply h_res to get mixed [s, b, n*C]
+        # Simulate BDA that is NOT checkpointed (last layer) - this is the hook_tensor
+        output_ckpt = aggregated_ckpt + 0.5 * mixed_ckpt
+
+        # Register unified recompute on the output (which is not checkpointed)
+        manager.discard_all_outputs_and_register_unified_recompute(output_ckpt)
+
+        loss_ckpt = output_ckpt.sum()
+        loss_ckpt.backward()
+        grad_hidden_ckpt = hidden_states_ckpt.grad.clone()
+
+        # Verify gradients match
+        assert torch.allclose(grad_hidden_ckpt, grad_hidden_ref, atol=1e-5)
+
+
+class TestTransformerConfigRecomputeMhc:
+    """Test 'mhc' in recompute_modules configuration."""
+
+    def test_config_default_value(self):
+        """Test that 'mhc' is not in recompute_modules by default."""
+        config = TransformerConfig(num_layers=2, hidden_size=64, num_attention_heads=4)
+        assert "mhc" not in config.recompute_modules
+
+    def test_config_enable_mhc_recompute(self):
+        """Test enabling 'mhc' in recompute_modules."""
+        config = TransformerConfig(
+            num_layers=2,
+            hidden_size=64,
+            num_attention_heads=4,
+            enable_hyper_connections=True,
+            num_residual_streams=4,
+            recompute_modules=["core_attn", "mhc"],
+            recompute_granularity='selective',
+        )
+        assert "mhc" in config.recompute_modules
+        assert config.enable_hyper_connections is True
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/unit_tests/transformer/test_mhc_block_manager.py b/tests/unit_tests/transformer/test_mhc_block_manager.py
new file mode 100644
index 00000000000..aab004d6516
--- /dev/null
+++ b/tests/unit_tests/transformer/test_mhc_block_manager.py
@@ -0,0 +1,397 @@
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import pytest
+import torch
+
+from megatron.core.tensor_parallel.random import (
+    CheckpointManager,
+    CheckpointWithoutOutput,
+    initialize_rng_tracker,
+)
+from tests.unit_tests.test_utilities import Utils
+
+
+class TestCheckpointWithoutOutputManagerAPI:
+    """Test CheckpointWithoutOutput integration with CheckpointManager."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel()
+        initialize_rng_tracker(force_reset=True)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_auto_register(self):
+        """CheckpointWithoutOutput auto-registers to manager when ckpt_manager is provided."""
+        manager = CheckpointManager()
+
+        def func(x):
+            return x * 2 + 1
+
+        input_t = torch.randn(4, 4, device='cuda', requires_grad=True)
+
+        ckpt = CheckpointWithoutOutput(ckpt_manager=manager)
+        y = ckpt.checkpoint(func, input_t)
+
+        assert len(manager.checkpoints) == 1
+        assert manager.checkpoints[0] is ckpt
+
+        ckpt2 = CheckpointWithoutOutput(ckpt_manager=manager)
+        y2 = ckpt2.checkpoint(torch.nn.functional.gelu, y)
+
+        assert len(manager.checkpoints) == 2
+        assert manager.checkpoints[1] is ckpt2
+
+        loss = y2.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss)
+        loss.backward()
+
+        assert input_t.grad is not None
+
+    def test_discard_is_noop_with_manager(self):
+        """discard_output_and_register_recompute is a NO-OP when ckpt_manager is set."""
+        manager = CheckpointManager()
+
+        def func1(x):
+            return x * 2
+
+        def func2(x):
+            return torch.nn.functional.gelu(x)
+
+        input_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+        y1_ref = func1(input_ref)
+        y2_ref = func2(y1_ref)
+        loss_ref = y2_ref.sum()
+        loss_ref.backward()
+        grad_ref = input_ref.grad.clone()
+
+        input_ckpt = input_ref.detach().clone().requires_grad_(True)
+
+        ckpt1 = CheckpointWithoutOutput(ckpt_manager=manager)
+        y1 = ckpt1.checkpoint(func1, input_ckpt)
+        ckpt1.discard_output_and_register_recompute(y1)
+
+        ckpt2 = CheckpointWithoutOutput(ckpt_manager=manager)
+        y2 = ckpt2.checkpoint(func2, y1)
+        ckpt2.discard_output_and_register_recompute(y2)
+
+        assert y1.untyped_storage().size() > 0, "y1 should NOT be discarded yet"
+        assert y2.untyped_storage().size() > 0, "y2 should NOT be discarded yet"
+
+        loss_ckpt = y2.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        assert y1.untyped_storage().size() == 0, "y1 should be discarded after manager call"
+        assert y2.untyped_storage().size() == 0, "y2 should be discarded after manager call"
+
+        loss_ckpt.backward()
+        grad_ckpt = input_ckpt.grad.clone()
+
+        assert torch.allclose(grad_ckpt, grad_ref, atol=1e-6)
+
+    def test_backward_compat_without_manager(self):
+        """CheckpointWithoutOutput without ckpt_manager should work exactly as before."""
+
+        def func(x):
+            return torch.nn.functional.gelu(x)
+
+        input_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+        y_ref = func(input_ref)
+        z_ref = y_ref * 2
+        loss_ref = z_ref.sum()
+        loss_ref.backward()
+        grad_ref = input_ref.grad.clone()
+
+        input_ckpt = input_ref.detach().clone().requires_grad_(True)
+
+        ckpt = CheckpointWithoutOutput()
+        y = ckpt.checkpoint(func, input_ckpt)
+        z = y * 2
+        ckpt.discard_output_and_register_recompute(z)
+
+        assert y.untyped_storage().size() == 0
+
+        loss_ckpt = z.sum()
+        loss_ckpt.backward()
+        grad_ckpt = input_ckpt.grad.clone()
+
+        assert torch.allclose(grad_ckpt, grad_ref, atol=1e-6)
+
+    def test_error_handling(self):
+        """CheckpointManager rejects invalid add_checkpoint calls."""
+        manager = CheckpointManager()
+
+        with pytest.raises(TypeError):
+            manager.add_checkpoint("not a checkpoint")
+
+        ckpt = CheckpointWithoutOutput()
+        with pytest.raises(ValueError):
+            manager.add_checkpoint(ckpt)
+
+
+class TestCheckpointManagerSequentialChain:
+    """Test CheckpointManager with sequential checkpoint chains."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel()
+        initialize_rng_tracker(force_reset=True)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_basic_sequential_chain(self):
+        """Three sequential checkpoints: gradients match non-checkpointed version."""
+
+        def func1(x):
+            return x * 2 + 1
+
+        def func2(x):
+            return torch.nn.functional.gelu(x)
+
+        def func3(x):
+            return x * x + x
+
+        input_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+        input_ckpt = input_ref.detach().clone().requires_grad_(True)
+
+        y1_ref = func1(input_ref)
+        y2_ref = func2(y1_ref)
+        y3_ref = func3(y2_ref)
+        loss_ref = y3_ref.sum()
+        loss_ref.backward()
+        grad_ref = input_ref.grad.clone()
+
+        manager = CheckpointManager()
+
+        y1 = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func1, input_ckpt)
+        y2 = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func2, y1)
+        y3 = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func3, y2)
+
+        loss_ckpt = y3.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        assert y1.untyped_storage().size() == 0, "y1 storage should be released"
+        assert y2.untyped_storage().size() == 0, "y2 storage should be released"
+        assert y3.untyped_storage().size() == 0, "y3 storage should be released"
+
+        loss_ckpt.backward()
+        grad_ckpt = input_ckpt.grad.clone()
+
+        assert torch.allclose(
+            grad_ckpt, grad_ref, atol=1e-6
+        ), f"Gradients mismatch!\nWith manager: {grad_ckpt}\nReference: {grad_ref}"
+
+    def test_sequential_chain_with_dropout(self):
+        """RNG state is restored during recompute so dropout gradients match."""
+
+        def func_with_dropout(x):
+            return torch.nn.functional.dropout(x, p=0.3, training=True)
+
+        def func2(x):
+            return torch.nn.functional.gelu(x)
+
+        input_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+        input_ckpt = input_ref.detach().clone().requires_grad_(True)
+
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+
+        y1_ref = func_with_dropout(input_ref)
+        y2_ref = func2(y1_ref)
+        loss_ref = y2_ref.sum()
+        loss_ref.backward()
+        grad_ref = input_ref.grad.clone()
+
+        torch.manual_seed(42)
+        torch.cuda.manual_seed(42)
+
+        manager = CheckpointManager()
+
+        y1 = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func_with_dropout, input_ckpt)
+        y2 = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func2, y1)
+
+        loss_ckpt = y2.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        loss_ckpt.backward()
+        grad_ckpt = input_ckpt.grad.clone()
+
+        assert torch.allclose(
+            grad_ckpt, grad_ref, atol=1e-6
+        ), f"Gradients with dropout mismatch!\nWith manager: {grad_ckpt}\nReference: {grad_ref}"
+
+    def test_multiple_outputs(self):
+        """CheckpointManager handles functions that return multiple outputs."""
+
+        def func_multi_output(x):
+            return x * 2, x + 1
+
+        def func_combine(a, b):
+            return a + b
+
+        input_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+        input_ckpt = input_ref.detach().clone().requires_grad_(True)
+
+        y1a_ref, y1b_ref = func_multi_output(input_ref)
+        y2_ref = func_combine(y1a_ref, y1b_ref)
+        loss_ref = y2_ref.sum()
+        loss_ref.backward()
+        grad_ref = input_ref.grad.clone()
+
+        manager = CheckpointManager()
+
+        y1a, y1b = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(
+            func_multi_output, input_ckpt
+        )
+        y2 = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func_combine, y1a, y1b)
+
+        loss_ckpt = y2.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        loss_ckpt.backward()
+        grad_ckpt = input_ckpt.grad.clone()
+
+        assert torch.allclose(grad_ckpt, grad_ref, atol=1e-6), (
+            f"Gradients mismatch with multiple outputs!\n"
+            f"With manager: {grad_ckpt}\nReference: {grad_ref}"
+        )
+
+
+class TestCheckpointManagerPartialCheckpoint:
+    """Test CheckpointManager with partial checkpointing (some ops not checkpointed)."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel()
+        initialize_rng_tracker(force_reset=True)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def test_partial_checkpoint(self):
+        """
+        Only f and h are checkpointed; g is a regular operation.
+
+        Computation chain:
+            a --[f]--> b --[g]--> c --[h]--> d --[sum]--> loss
+        """
+
+        def func_f(x):
+            return torch.nn.functional.gelu(x * 2 + 1)
+
+        def func_g(x):
+            return x * 3 - 2
+
+        def func_h(x):
+            return torch.sigmoid(x) + x
+
+        input_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+
+        b_ref = func_f(input_ref)
+        c_ref = func_g(b_ref)
+        d_ref = func_h(c_ref)
+        loss_ref = d_ref.sum()
+        loss_ref.backward()
+        grad_ref = input_ref.grad.clone()
+
+        input_ckpt = input_ref.detach().clone().requires_grad_(True)
+
+        manager = CheckpointManager()
+
+        b = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func_f, input_ckpt)
+        c = func_g(b)
+        d = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(func_h, c)
+
+        loss_ckpt = d.sum()
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        assert b.untyped_storage().size() == 0, "b storage should be released"
+        assert d.untyped_storage().size() == 0, "d storage should be released"
+        assert c.untyped_storage().size() > 0, "c storage should NOT be released (not checkpointed)"
+
+        loss_ckpt.backward()
+        grad_ckpt = input_ckpt.grad.clone()
+
+        assert torch.allclose(grad_ckpt, grad_ref, atol=1e-6), (
+            f"Gradients mismatch with partial checkpoint!\n"
+            f"With manager: {grad_ckpt}\nReference: {grad_ref}"
+        )
+
+    def test_partial_checkpoint_with_tuple_output(self):
+        """
+        Mimics HyperConnection's computation pattern with tuple outputs.
+
+        - compute_mappings: checkpointed, returns tuple (h_pre, h_post, h_res)
+        - aggregate: NOT checkpointed
+        - apply_h_res: checkpointed
+        - apply_h_post: checkpointed
+        """
+
+        def compute_mappings(x):
+            h_pre = torch.sigmoid(x.mean(dim=-1, keepdim=True).expand_as(x))
+            h_post = torch.tanh(x.sum(dim=-1, keepdim=True).expand_as(x))
+            h_res = torch.relu(x)
+            return h_pre, h_post, h_res
+
+        def aggregate(x, h_pre):
+            return x * h_pre
+
+        def apply_h_res(h_res, residual):
+            return h_res + residual * 0.5
+
+        def apply_h_post(y, h_post):
+            return y * h_post + y
+
+        x_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+        residual_ref = torch.randn(4, 4, device='cuda', requires_grad=True)
+
+        h_pre_ref, h_post_ref, h_res_ref = compute_mappings(x_ref)
+        agg_ref = aggregate(x_ref, h_pre_ref)
+        y_ref = torch.nn.functional.gelu(agg_ref)
+        mixed_ref = apply_h_res(h_res_ref, residual_ref)
+        output_ref = apply_h_post(y_ref, h_post_ref)
+        final_ref = output_ref + mixed_ref
+        loss_ref = final_ref.sum()
+        loss_ref.backward()
+        grad_x_ref = x_ref.grad.clone()
+        grad_residual_ref = residual_ref.grad.clone()
+
+        x_ckpt = x_ref.detach().clone().requires_grad_(True)
+        residual_ckpt = residual_ref.detach().clone().requires_grad_(True)
+
+        manager = CheckpointManager()
+
+        h_pre, h_post, h_res = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(
+            compute_mappings, x_ckpt
+        )
+        agg = aggregate(x_ckpt, h_pre)
+        y = torch.nn.functional.gelu(agg)
+        mixed = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(
+            apply_h_res, h_res, residual_ckpt
+        )
+        output = CheckpointWithoutOutput(ckpt_manager=manager).checkpoint(apply_h_post, y, h_post)
+
+        final = output + mixed
+        loss_ckpt = final.sum()
+
+        manager.discard_all_outputs_and_register_unified_recompute(loss_ckpt)
+
+        assert h_pre.untyped_storage().size() == 0, "h_pre storage should be released"
+        assert h_post.untyped_storage().size() == 0, "h_post storage should be released"
+        assert h_res.untyped_storage().size() == 0, "h_res storage should be released"
+        assert mixed.untyped_storage().size() == 0, "mixed storage should be released"
+        assert output.untyped_storage().size() == 0, "output storage should be released"
+
+        assert agg.untyped_storage().size() > 0, "agg storage should NOT be released"
+        assert y.untyped_storage().size() > 0, "y storage should NOT be released"
+
+        loss_ckpt.backward()
+        grad_x_ckpt = x_ckpt.grad.clone()
+        grad_residual_ckpt = residual_ckpt.grad.clone()
+
+        assert torch.allclose(
+            grad_x_ckpt, grad_x_ref, atol=1e-6
+        ), f"Gradients for x mismatch!\nWith manager: {grad_x_ckpt}\nReference: {grad_x_ref}"
+        assert torch.allclose(grad_residual_ckpt, grad_residual_ref, atol=1e-6), (
+            f"Gradients for residual mismatch!\n"
+            f"With manager: {grad_residual_ckpt}\nReference: {grad_residual_ref}"
+        )
diff --git a/tests/unit_tests/transformer/test_multi_token_prediction.py b/tests/unit_tests/transformer/test_multi_token_prediction.py
index 57423da335b..0a3026918cc 100644
--- a/tests/unit_tests/transformer/test_multi_token_prediction.py
+++ b/tests/unit_tests/transformer/test_multi_token_prediction.py
@@ -105,7 +105,7 @@ def test_constructor_local(self, tp):
             assert num_weights == 15216 * config.mtp_num_layers
 
     @pytest.mark.skipif(not HAVE_TE, reason="transformer_engine not available")
-    @pytest.mark.parametrize(('tp', 'cp'), [(1, 1), (1, 2), (2, 1), (2, 2)])
+    @pytest.mark.parametrize(('tp', 'cp'), [(1, 1), (2, 1), (2, 2)])
     def test_constructor_ues_te(self, tp, cp):
         """Test basic construction of MTP module."""
         torch.manual_seed(_SEED)
@@ -312,7 +312,7 @@ def get_packed_batch(self, seq_lengths, micro_batch_size):
         not HAVE_TE or not is_te_min_version("2.1.0"),
         reason="grouped_gemm requires TransformerEngine >= 2.1.0",
     )
-    @pytest.mark.parametrize(("tp", "cp"), [(1, 1), (1, 2), (2, 1), (2, 2)])
+    @pytest.mark.parametrize(("tp", "cp"), [(2, 1), (2, 2)])
     def test_sharded_state_dict(self, tp, cp):
         """Test MTP with different tensor parallel sizes."""
         args = self.create_test_args(tp, cp, self.seq_length, self.micro_batch_size)
@@ -331,9 +331,8 @@ def test_sharded_state_dict(self, tp, cp):
         not HAVE_TE or not is_te_min_version("2.1.0"),
         reason="grouped_gemm requires TransformerEngine >= 2.1.0",
     )
-    @pytest.mark.parametrize("full_recompute", [False, True])
     @pytest.mark.parametrize(
-        ("tp", "cp"), [(1, 1), (1, 2), (1, 4), (2, 1), (2, 2), (2, 4), (4, 1), (4, 2)]
+        ("tp", "cp", "full_recompute"), [(1, 1, False), (1, 4, False), (2, 4, False), (4, 1, True)]
     )
     def test_forward_backward(self, tmp_path_dist_ckpt, tp, cp, full_recompute):
         """Test MTP forward and backward with gptmodel."""
diff --git a/tests/unit_tests/transformer/test_submodule_callables.py b/tests/unit_tests/transformer/test_submodule_callables.py
index 03e2d751a52..7b41b3ca197 100644
--- a/tests/unit_tests/transformer/test_submodule_callables.py
+++ b/tests/unit_tests/transformer/test_submodule_callables.py
@@ -138,7 +138,6 @@ def test_1f1b_overlap(self, dispatcher_type, grouped_gemm, permute_fusion):
         }
         if dispatcher_type == "flex":
             extra_kwargs["moe_flex_dispatcher_backend"] = "deepep"
-            extra_kwargs["moe_router_dtype"] = "fp32"
         config = get_test_config(extra_kwargs=extra_kwargs, moe_grouped_gemm=grouped_gemm)
         microbatches = 4
         with deterministic_mode():
diff --git a/tests/unit_tests/transformer/test_thd_correctness.py b/tests/unit_tests/transformer/test_thd_correctness.py
index ccf70b8a885..533f64081f4 100644
--- a/tests/unit_tests/transformer/test_thd_correctness.py
+++ b/tests/unit_tests/transformer/test_thd_correctness.py
@@ -30,6 +30,7 @@
 import torch.nn as nn
 
 from megatron.core import parallel_state
+from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
 from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
@@ -149,15 +150,29 @@ def compute_sbhd_padded_max_len(
 
 
 def compute_thd_padded_seqlens(
-    seqlens: List[int], cp_size: int, tp_size: int, sp_enabled: bool, pad_to_max: bool = False
+    seqlens: List[int],
+    cp_size: int,
+    tp_size: int,
+    sp_enabled: bool,
+    pad_to_max: bool = False,
+    dynamic_cp: bool = False,
 ) -> List[int]:
     """Padded per-sequence lengths for THD.
 
     When pad_to_max=True, each sequence is padded to max(seqlens) so that
     total THD tokens = max_len * B, matching SBHD. This ensures TE GEMM
     kernels see identical M dimensions for bitwise comparison.
+
+    When dynamic_cp=True, pad to the global upper-bound CP size so that the
+    same packed layout works regardless of which dynamic CP sub-group the
+    sequence lands in.
     """
-    cp_divisor = 2 * cp_size if cp_size > 1 else 1
+    if dynamic_cp:
+        MAX_CP_SIZE = 8
+        effective_cp = max(cp_size, MAX_CP_SIZE)
+    else:
+        effective_cp = cp_size
+    cp_divisor = 2 * effective_cp if cp_size > 1 else 1
     if pad_to_max:
         max_len = _round_up(max(seqlens), cp_divisor)
         padded = [max_len] * len(seqlens)
@@ -181,6 +196,7 @@ def make_packed_seq_params(
     tp_size: int = 1,
     sp_enabled: bool = False,
     pad_to_max: bool = False,
+    dynamic_cp: bool = False,
 ) -> PackedSeqParams:
     """Create PackedSeqParams with cu_seqlens and cu_seqlens_padded."""
 
@@ -190,7 +206,9 @@ def to_cu_seqlens(lens):
             cu[i + 1] = cu[i] + l
         return cu.cuda()
 
-    padded = compute_thd_padded_seqlens(seqlens, cp_size, tp_size, sp_enabled, pad_to_max)
+    padded = compute_thd_padded_seqlens(
+        seqlens, cp_size, tp_size, sp_enabled, pad_to_max, dynamic_cp=dynamic_cp
+    )
     return PackedSeqParams(
         cu_seqlens_q=to_cu_seqlens(seqlens),
         cu_seqlens_kv=to_cu_seqlens(seqlens),
@@ -362,10 +380,21 @@ def shard_sbhd(tensor, cp_rank, cp_size, tp_rank, tp_size, sp_enabled):
 
 
 def shard_thd(
-    seq_data_list, seqlens, cp_rank, cp_size, tp_rank, tp_size, sp_enabled, H, pad_to_max=False
+    seq_data_list,
+    seqlens,
+    cp_rank,
+    cp_size,
+    tp_rank,
+    tp_size,
+    sp_enabled,
+    H,
+    pad_to_max=False,
+    dynamic_cp=False,
 ):
     """Shard per-sequence data into local THD [local_T, 1, H]."""
-    padded = compute_thd_padded_seqlens(seqlens, cp_size, tp_size, sp_enabled, pad_to_max)
+    padded = compute_thd_padded_seqlens(
+        seqlens, cp_size, tp_size, sp_enabled, pad_to_max, dynamic_cp=dynamic_cp
+    )
 
     chunks = []
     for data, sl, psl in zip(seq_data_list, seqlens, padded):
@@ -446,7 +475,7 @@ class _GatherTHD(torch.autograd.Function):
     """Gather THD outputs from all ranks with gradient support."""
 
     @staticmethod
-    def forward(ctx, local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max):
+    def forward(ctx, local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max, dynamic_cp):
         ctx.seqlens, ctx.cp_size, ctx.tp_size, ctx.sp_enabled, ctx.H = (
             seqlens,
             cp_size,
@@ -456,7 +485,9 @@ def forward(ctx, local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max):
         )
         ctx.cp_rank = parallel_state.get_context_parallel_rank() if cp_size > 1 else 0
         ctx.tp_rank = parallel_state.get_tensor_model_parallel_rank()
-        ctx.padded = compute_thd_padded_seqlens(seqlens, cp_size, tp_size, sp_enabled, pad_to_max)
+        ctx.padded = compute_thd_padded_seqlens(
+            seqlens, cp_size, tp_size, sp_enabled, pad_to_max, dynamic_cp=dynamic_cp
+        )
 
         out = local
         if sp_enabled:
@@ -495,7 +526,7 @@ def backward(ctx, grad):
         if ctx.sp_enabled:
             seg = packed.shape[0] // ctx.tp_size
             packed = packed[ctx.tp_rank * seg : (ctx.tp_rank + 1) * seg]
-        return packed.unsqueeze(1).contiguous(), None, None, None, None, None, None
+        return packed.unsqueeze(1).contiguous(), None, None, None, None, None, None, None
 
 
 def gather_sbhd(local, cp_size, tp_size, sp_enabled):
@@ -504,8 +535,8 @@ def gather_sbhd(local, cp_size, tp_size, sp_enabled):
     return _GatherSBHD.apply(local, cp_size, tp_size, sp_enabled)
 
 
-def gather_thd(local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max=False):
-    return _GatherTHD.apply(local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max)
+def gather_thd(local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max=False, dynamic_cp=False):
+    return _GatherTHD.apply(local, seqlens, cp_size, tp_size, sp_enabled, H, pad_to_max, dynamic_cp)
 
 
 # =============================================================================
@@ -647,3 +678,386 @@ def test_thd_format(tc: TestCase):
     if tc.forward_bitwise or tc.backward_bitwise:
         torch.use_deterministic_algorithms(False)
         os.environ.pop("NVTE_ALLOW_NONDETERMINISTIC_ALGO", None)
+
+
+# =============================================================================
+# Dynamic CP Test Infrastructure
+# =============================================================================
+
+
+@dataclass
+class DynamicCPAssignment:
+    """Per-rank assignment in the dynamic CP configuration.
+
+    local_cp_size: number of ranks in this rank's CP communicator.
+    seq_indices: indices into the test case's seqlens list that this rank processes.
+
+    Ranks sharing the same CP sub-group have identical DynamicCPAssignment values.
+    """
+
+    local_cp_size: int
+    seq_indices: List[int]
+
+
+@dataclass
+class DynamicCPTestCase:
+    """Test case for dynamic CP correctness.
+
+    Compares fixed CP (baseline) against dynamic CP where sub-groups of ranks
+    can process different sequences with different CP sizes.
+
+    dcp_assignments: one entry per DP×CP rank (len == dp_cp_world_size).
+    Ranks in the same sub-group share the same local_cp_size and seq_indices.
+    """
+
+    name: str
+    hidden_size: int
+    num_heads: int
+    num_kv_heads: int
+    ffn_hidden_size: int
+    seqlens: List[int]
+    tp_size: int
+    cp_size: int
+    sp_enabled: bool
+    dcp_assignments: List[DynamicCPAssignment]
+
+
+# Dynamic CP Test Cases
+# ---------------------
+# Each test runs two paths through the *same* TransformerLayer and compares
+# forward outputs + backward gradients (similarity check with TE attention).
+#
+# Parameters:
+#   cp_size — the CP size used for the *baseline* (fixed CP) path.  It also
+#   determines dp_size = world_size // (tp_size * cp_size), which controls how
+#   sequences are split across DP ranks in the baseline.  The dynamic CP path
+#   ignores this cp_size and instead uses the local_cp_size from each
+#   DynamicCPAssignment.
+#
+# Baseline (fixed CP):
+#   Sequences are evenly split across DP ranks (seqs_per_dp = len(seqlens) //
+#   dp_size).  Each DP rank runs standard CP (cp_size) on its subset:
+#   pad → zigzag shard → forward → gather → backward.
+#
+# Dynamic CP:
+#   dcp_assignments has one entry per DP×CP rank.  Ranks sharing a CP sub-group
+#   have identical (local_cp_size, seq_indices).  Each sub-group forms its own
+#   CP communicator and independently shards / gathers only the sequences
+#   assigned to it.
+#
+# Sequence lengths are intentionally non-powers-of-two (mostly primes) so
+# that padding to cp_divisor is always exercised.
+#
+# fmt: off
+_A = DynamicCPAssignment
+DYNAMIC_CP_TEST_CASES = [
+    # -------------------------------------------------------------------------
+    # Uniform: all dp_cp ranks share all seqs with larger local_cp_size.
+    # All 4 ranks form one sub-group → equivalent to fixed CP but via the
+    # dynamic CP code path.
+    # -------------------------------------------------------------------------
+    # tp=2, cp=2, world_size=8 → dp_cp_size=4, all ranks get same assignment
+    DynamicCPTestCase(
+        "dcp_uniform_tp2_cp2_sp",
+        4096, 32, 8, 14336,
+        [3947, 1999, 1037, 4091, 2111, 503],
+        tp_size=2, cp_size=2, sp_enabled=True,
+        dcp_assignments=[
+            _A(4, [0, 1, 2, 3, 4, 5]),  # dp_cp_rank 0
+            _A(4, [0, 1, 2, 3, 4, 5]),  # dp_cp_rank 1
+            _A(4, [0, 1, 2, 3, 4, 5]),  # dp_cp_rank 2
+            _A(4, [0, 1, 2, 3, 4, 5]),  # dp_cp_rank 3
+        ],
+    ),
+    # tp=1, cp=2, world_size=8 → dp_cp_size=8, all ranks get same assignment
+    DynamicCPTestCase(
+        "dcp_uniform_tp1_cp2",
+        1024, 16, 4, 4096,
+        [4001, 2039, 997, 511, 3967, 2053, 1009, 499],
+        tp_size=1, cp_size=2, sp_enabled=False,
+        dcp_assignments=[
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 0
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 1
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 2
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 3
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 4
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 5
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 6
+            _A(8, [0, 1, 2, 3, 4, 5, 6, 7]),  # dp_cp_rank 7
+        ],
+    ),
+    # -------------------------------------------------------------------------
+    # Heterogeneous: sub-groups with different local_cp_size.
+    # Ranks are split into multiple CP sub-groups; some ranks process
+    # sequences alone (local_cp_size=1) while others cooperate (local_cp_size=2+).
+    # -------------------------------------------------------------------------
+    # tp=2, cp=4, world_size=8 → dp_cp_size=4
+    #   rank 0: alone (cp=1), rank 1: alone (cp=1), ranks 2-3: pair (cp=2)
+    DynamicCPTestCase(
+        "dcp_hetero_tp2_cp4_sp",
+        4096, 32, 8, 14336,
+        [4093, 2017, 3989, 2111, 1013, 509],
+        tp_size=2, cp_size=4, sp_enabled=True,
+        dcp_assignments=[
+            _A(1, [0]),              # dp_cp_rank 0: solo
+            _A(1, [1]),              # dp_cp_rank 1: solo
+            _A(2, [2, 3, 4, 5]),     # dp_cp_rank 2: pair with rank 3
+            _A(2, [2, 3, 4, 5]),     # dp_cp_rank 3: pair with rank 2
+        ],
+    ),
+    # tp=1, cp=4, world_size=8 → dp_cp_size=8
+    #   ranks 0,1: solo; ranks 2-3: pair; ranks 4,5: solo; ranks 6-7: pair
+    DynamicCPTestCase(
+        "dcp_hetero_tp1_cp4",
+        1024, 16, 4, 4096,
+        [4007, 2003, 3989, 2053, 4091, 2017, 1013, 503],
+        tp_size=1, cp_size=4, sp_enabled=False,
+        dcp_assignments=[
+            _A(1, [0]),          # dp_cp_rank 0: solo
+            _A(1, [1]),          # dp_cp_rank 1: solo
+            _A(2, [2, 3]),       # dp_cp_rank 2: pair with rank 3
+            _A(2, [2, 3]),       # dp_cp_rank 3: pair with rank 2
+            _A(1, [4]),          # dp_cp_rank 4: solo
+            _A(1, [5]),          # dp_cp_rank 5: solo
+            _A(2, [6, 7]),       # dp_cp_rank 6: pair with rank 7
+            _A(2, [6, 7]),       # dp_cp_rank 7: pair with rank 6
+        ],
+    ),
+    # -------------------------------------------------------------------------
+    # Mixed: cp4 + cp2 + cp1 + cp1, baseline fixed cp=2.
+    # tp=1, cp=2, world_size=8 → dp_cp_size=8, dp_size=4
+    #   ranks 0-3: quad (cp=4), ranks 4-5: pair (cp=2), rank 6: solo, rank 7: solo
+    # -------------------------------------------------------------------------
+    DynamicCPTestCase(
+        "dcp_mixed_tp1_cp2",
+        1024, 16, 4, 4096,
+        [4007, 2003, 3989, 2053, 4091, 2017, 1013, 503],
+        tp_size=1, cp_size=2, sp_enabled=False,
+        dcp_assignments=[
+            _A(4, [0, 1, 2, 3]),     # dp_cp_rank 0: quad with ranks 1,2,3
+            _A(4, [0, 1, 2, 3]),     # dp_cp_rank 1: quad with ranks 0,2,3
+            _A(4, [0, 1, 2, 3]),     # dp_cp_rank 2: quad with ranks 0,1,3
+            _A(4, [0, 1, 2, 3]),     # dp_cp_rank 3: quad with ranks 0,1,2
+            _A(2, [4, 5]),           # dp_cp_rank 4: pair with rank 5
+            _A(2, [4, 5]),           # dp_cp_rank 5: pair with rank 4
+            _A(1, [6]),              # dp_cp_rank 6: solo
+            _A(1, [7]),              # dp_cp_rank 7: solo
+        ],
+    ),
+]
+# fmt: on
+
+
+# =============================================================================
+# Dynamic CP Gather (with explicit cp_group)
+# =============================================================================
+
+
+class _GatherTHDDynamic(torch.autograd.Function):
+    """Gather THD outputs from an explicit CP group with gradient support."""
+
+    @staticmethod
+    def forward(ctx, local, seqlens, cp_size, tp_size, sp_enabled, H, cp_group, cp_rank):
+        ctx.seqlens, ctx.cp_size, ctx.tp_size, ctx.sp_enabled, ctx.H = (
+            seqlens,
+            cp_size,
+            tp_size,
+            sp_enabled,
+            H,
+        )
+        ctx.cp_rank = cp_rank
+        ctx.tp_rank = parallel_state.get_tensor_model_parallel_rank()
+        ctx.padded = compute_thd_padded_seqlens(
+            seqlens, cp_size, tp_size, sp_enabled, False, dynamic_cp=True
+        )
+
+        out = local
+        if sp_enabled:
+            gathered = [torch.empty_like(out) for _ in range(tp_size)]
+            dist.all_gather(
+                gathered, out.contiguous(), group=parallel_state.get_tensor_model_parallel_group()
+            )
+            out = torch.cat(gathered, dim=0)
+
+        if cp_size > 1:
+            local_lens = [p // cp_size for p in ctx.padded]
+            offset, seqs = 0, []
+            for i, ll in enumerate(local_lens):
+                chunk = out[offset : offset + ll]
+                gathered = [torch.empty_like(chunk) for _ in range(cp_size)]
+                dist.all_gather(gathered, chunk.contiguous(), group=cp_group)
+                seqs.append(_zigzag_merge(gathered, cp_size)[: seqlens[i]])
+                offset += ll
+            out = torch.cat(seqs, dim=0)
+        else:
+            out = _strip_thd_padding(out, seqlens, ctx.padded)
+        return out
+
+    @staticmethod
+    def backward(ctx, grad):
+        offset, chunks = 0, []
+        for sl, psl in zip(ctx.seqlens, ctx.padded):
+            g = grad[offset : offset + sl, 0, :]
+            if psl > sl:
+                g = torch.cat([g, torch.zeros(psl - sl, ctx.H, dtype=g.dtype, device=g.device)])
+            chunks.append(_zigzag_split(g, ctx.cp_rank, ctx.cp_size))
+            offset += sl
+
+        packed = torch.cat(chunks, dim=0)
+        if ctx.sp_enabled:
+            seg = packed.shape[0] // ctx.tp_size
+            packed = packed[ctx.tp_rank * seg : (ctx.tp_rank + 1) * seg]
+        return packed.unsqueeze(1).contiguous(), None, None, None, None, None, None, None
+
+
+def gather_thd_dynamic(local, seqlens, cp_size, tp_size, sp_enabled, H, cp_group, cp_rank):
+    return _GatherTHDDynamic.apply(
+        local, seqlens, cp_size, tp_size, sp_enabled, H, cp_group, cp_rank
+    )
+
+
+# =============================================================================
+# Dynamic CP Test Function
+# =============================================================================
+
+
+@pytest.mark.parametrize("tc", DYNAMIC_CP_TEST_CASES, ids=lambda tc: tc.name)
+def test_dynamic_cp_format(tc: DynamicCPTestCase):
+    """Compare fixed CP THD vs dynamic CP THD format outputs and gradients."""
+    H, seqlens = tc.hidden_size, tc.seqlens
+    tp_size, cp_size, sp = tc.tp_size, tc.cp_size, tc.sp_enabled
+
+    Utils.initialize_model_parallel(
+        tensor_model_parallel_size=tp_size,
+        context_parallel_size=cp_size,
+        dynamic_context_parallel=True,
+    )
+    model_parallel_cuda_manual_seed(42)
+
+    layer = build_gpt_layer(
+        H,
+        tc.num_heads,
+        tc.num_kv_heads,
+        tc.ffn_hidden_size,
+        tp_size,
+        cp_size,
+        sp,
+        use_mock_attention=False,
+        deterministic=False,
+    )
+    kv_channels = H // tc.num_heads
+    rope = RotaryEmbedding(kv_channels=kv_channels, rotary_percent=1.0).cuda()
+
+    cp_rank = parallel_state.get_context_parallel_rank()
+    tp_rank = parallel_state.get_tensor_model_parallel_rank()
+    dp_rank = parallel_state.get_data_parallel_rank()
+    dp_size = parallel_state.get_data_parallel_world_size()
+
+    # All ranks generate identical full data (same seed, no dp_rank offset)
+    torch.manual_seed(42)
+    all_seq_data = [torch.randn(sl, H, dtype=torch.bfloat16).cuda() for sl in seqlens]
+    torch.manual_seed(142)
+    all_grad_data = [torch.randn(sl, H, dtype=torch.bfloat16).cuda() for sl in seqlens]
+
+    # === Baseline: fixed CP, THD format ===
+    assert (
+        len(seqlens) % dp_size == 0
+    ), f"Need len(seqlens)={len(seqlens)} divisible by dp_size={dp_size}"
+    seqs_per_dp = len(seqlens) // dp_size
+    base_indices = list(range(dp_rank * seqs_per_dp, (dp_rank + 1) * seqs_per_dp))
+    base_seqlens = [seqlens[i] for i in base_indices]
+    base_seq_data = [all_seq_data[i] for i in base_indices]
+    base_grad_data = [all_grad_data[i] for i in base_indices]
+
+    local_thd_base = shard_thd(
+        base_seq_data, base_seqlens, cp_rank, cp_size, tp_rank, tp_size, sp, H, dynamic_cp=True
+    )
+    packed_base = make_packed_seq_params(base_seqlens, cp_size, tp_size, sp, dynamic_cp=True)
+    rotary_pos_emb_base = rope(packed_base.max_seqlen_q, packed_seq=True)
+    input_base = local_thd_base.detach().clone().requires_grad_(True)
+    out_base, _ = layer(
+        hidden_states=input_base, packed_seq_params=packed_base, rotary_pos_emb=rotary_pos_emb_base
+    )
+    gathered_base = gather_thd(out_base, base_seqlens, cp_size, tp_size, sp, H, dynamic_cp=True)
+    grad_base = torch.cat(base_grad_data, dim=0).unsqueeze(1)
+    gathered_base.backward(grad_base)
+    baseline_grads = {n: p.grad.clone() for n, p in layer.named_parameters()}
+    layer.zero_grad()
+
+    # === Dynamic CP ===
+    dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
+    dp_cp_rank = dist.get_rank(group=dp_cp_group)
+
+    assert dp_cp_rank < len(
+        tc.dcp_assignments
+    ), f"dp_cp_rank={dp_cp_rank} out of range (len={len(tc.dcp_assignments)})"
+    my_assignment = tc.dcp_assignments[dp_cp_rank]
+    local_cp_size = my_assignment.local_cp_size
+    dcp_indices = my_assignment.seq_indices
+    dcp_seqlens = [seqlens[i] for i in dcp_indices]
+    dcp_seq_data = [all_seq_data[i] for i in dcp_indices]
+    dcp_grad_data = [all_grad_data[i] for i in dcp_indices]
+
+    dcp_cp_group = parallel_state.get_dynamic_data_context_parallel_groups(group_size=local_cp_size)
+    dcp_cp_rank = dist.get_rank(group=dcp_cp_group)
+
+    local_thd_dcp = shard_thd(
+        dcp_seq_data,
+        dcp_seqlens,
+        dcp_cp_rank,
+        local_cp_size,
+        tp_rank,
+        tp_size,
+        sp,
+        H,
+        dynamic_cp=True,
+    )
+    packed_dcp = make_packed_seq_params(dcp_seqlens, local_cp_size, tp_size, sp, dynamic_cp=True)
+    packed_dcp.local_cp_size = local_cp_size
+    packed_dcp.cp_group = dcp_cp_group
+    rotary_pos_emb_dcp = rope(packed_dcp.max_seqlen_q, packed_seq=True)
+
+    input_dcp = local_thd_dcp.detach().clone().requires_grad_(True)
+    out_dcp, _ = layer(
+        hidden_states=input_dcp, packed_seq_params=packed_dcp, rotary_pos_emb=rotary_pos_emb_dcp
+    )
+    gathered_dcp = gather_thd_dynamic(
+        out_dcp, dcp_seqlens, local_cp_size, tp_size, sp, H, dcp_cp_group, dcp_cp_rank
+    )
+    grad_dcp = torch.cat(dcp_grad_data, dim=0).unsqueeze(1)
+    gathered_dcp.backward(grad_dcp)
+    dcp_grads = {n: p.grad.clone() for n, p in layer.named_parameters()}
+
+    # === Gradient sync: reduce across all DP×CP ranks ===
+    for n in baseline_grads:
+        dist.all_reduce(baseline_grads[n], group=dp_cp_group)
+        dist.all_reduce(dcp_grads[n], group=dp_cp_group)
+    if sp:
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        for n, p in layer.named_parameters():
+            if getattr(p, "sequence_parallel", False):
+                dist.all_reduce(baseline_grads[n], group=tp_group)
+                dist.all_reduce(dcp_grads[n], group=tp_group)
+
+    # === Forward comparison (per-sequence, on ranks that have both) ===
+    common_indices = sorted(set(base_indices) & set(dcp_indices))
+    for seq_idx in common_indices:
+        sl = seqlens[seq_idx]
+        base_pos = base_indices.index(seq_idx)
+        base_offset = sum(base_seqlens[:base_pos])
+        dcp_pos = dcp_indices.index(seq_idx)
+        dcp_offset = sum(dcp_seqlens[:dcp_pos])
+        assert_close(
+            f"seq[{seq_idx}] output",
+            gathered_base[base_offset : base_offset + sl, 0].detach(),
+            gathered_dcp[dcp_offset : dcp_offset + sl, 0].detach(),
+            False,
+        )
+
+    # === Backward comparison ===
+    for n in baseline_grads:
+        if n in dcp_grads:
+            assert_close(f"grad[{n}]", baseline_grads[n], dcp_grads[n], False)
+
+    # === Cleanup ===
+    Utils.destroy_model_parallel()
diff --git a/tests/unit_tests/transformer/test_transformer_layer.py b/tests/unit_tests/transformer/test_transformer_layer.py
index da1f9ce5860..c80b8f14480 100644
--- a/tests/unit_tests/transformer/test_transformer_layer.py
+++ b/tests/unit_tests/transformer/test_transformer_layer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 
 import pytest
@@ -8,17 +8,41 @@
 from megatron.core.dist_checkpointing.mapping import ShardedObject, ShardedTensor
 from megatron.core.inference.contexts import StaticInferenceContext
 from megatron.core.models.gpt.gpt_layer_specs import (
+    get_gpt_layer_with_transformer_engine_spec,
     get_gpt_layer_with_transformer_engine_submodules,
 )
-from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+from megatron.core.tensor_parallel.random import CheckpointManager, model_parallel_cuda_manual_seed
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import (
+    HyperConnectionTransformerLayer,
     TransformerLayer,
     get_transformer_layer_offset,
 )
 from tests.unit_tests.test_utilities import Utils
 
 
+def _make_mhc_config(hidden_size=64, num_streams=4, **extra):
+    """Build a TransformerConfig with common MHC defaults.
+
+    Any default can be overridden via **extra
+    (e.g. ``_make_mhc_config(num_layers=8, recompute_modules=["core_attn", "mhc"])``).
+    """
+    base = dict(
+        num_layers=2,
+        hidden_size=hidden_size,
+        num_attention_heads=4,
+        use_cpu_initialization=True,
+        enable_hyper_connections=True,
+        num_residual_streams=num_streams,
+        mhc_sinkhorn_iterations=5,
+        mhc_init_gating_factor=0.01,
+        hidden_dropout=0.0,
+        attention_dropout=0.0,
+    )
+    base.update(extra)
+    return TransformerConfig(**base)
+
+
 class TestParallelTransformerLayer:
 
     def setup_method(self, method):
@@ -313,3 +337,765 @@ def get_tensor_shapes_for_tp(transformer_config, tp_size):
         'self_attention.linear_qkv.weight': (hs * 3 // tp_size, hs),
         'self_attention.linear_qkv.bias': (hs * 3 // tp_size,),
     }
+
+
+class TestTransformerLayerWithHyperConnectionRecompute:
+    """Test TransformerLayer with HyperConnection and MHC block recomputation."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def _create_layer_with_hyper_connection(
+        self, hidden_size=64, num_streams=4, layer_number=1, **extra
+    ):
+        """Create a HyperConnectionTransformerLayer with hyper connection enabled."""
+        config = _make_mhc_config(
+            hidden_size=hidden_size,
+            num_streams=num_streams,
+            recompute_modules=["core_attn", "mhc"],
+            recompute_granularity='selective',
+            **extra,
+        )
+        layer_spec = get_gpt_layer_with_transformer_engine_spec(enable_hyper_connection=True)
+        layer = HyperConnectionTransformerLayer(
+            config, layer_spec.submodules, layer_number=layer_number
+        )
+        layer.cuda()
+        return layer, config
+
+    def test_forward_with_hyper_connection_recompute(self):
+        """
+        Test that TransformerLayer forward works correctly with HyperConnection
+        and MHC block recomputation enabled.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+
+        layer, config = self._create_layer_with_hyper_connection(hidden_size, num_streams)
+        layer.train()  # Enable training mode for recomputation
+
+        # Input shape: [seq_len, batch_size, n * hidden_size] for hyper connections
+        n_channels = num_streams * hidden_size
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        # Create manager for MHC block recomputation
+        manager = CheckpointManager()
+
+        # Forward pass with recompute manager
+        manager.is_last_layer_in_recompute_block = True
+        output, context = layer(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            mhc_recompute_manager=manager,
+        )
+
+        # Verify output shape
+        assert output.shape == (
+            seq_len,
+            batch_size,
+            n_channels,
+        ), f"Expected output shape {(seq_len, batch_size, n_channels)}, got {output.shape}"
+
+        # Register unified recompute hook at block boundary.
+        manager.discard_all_outputs_and_register_unified_recompute(output)
+
+        # Backward pass should work without error
+        loss = output.sum()
+        loss.backward()
+
+        # Verify gradients exist
+        assert hidden_states.grad is not None, "Gradients should be computed for hidden_states"
+        assert hidden_states.grad.shape == hidden_states.shape
+
+    def test_intermediate_layer_with_recompute(self):
+        """
+        Test TransformerLayer as an intermediate layer (not last in block).
+        In this case, MLP BDA should also be checkpointed.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+
+        layer, config = self._create_layer_with_hyper_connection(hidden_size, num_streams)
+        layer.train()
+
+        n_channels = num_streams * hidden_size
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        manager = CheckpointManager()
+
+        # Forward pass - NOT the last layer in block
+        manager.is_last_layer_in_recompute_block = False
+        output, context = layer(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            mhc_recompute_manager=manager,
+        )
+
+        # Verify output shape
+        assert output.shape == (seq_len, batch_size, n_channels)
+
+        # Backward pass should work
+        loss = output.sum()
+        # For intermediate layers, we need to pass output to next layer
+        # Here we just register the recompute hook on output for testing
+        manager.discard_all_outputs_and_register_unified_recompute(loss)
+
+        loss.backward()
+
+        assert hidden_states.grad is not None
+        assert hidden_states.grad.shape == hidden_states.shape
+
+    def test_multiple_layers_chain_with_recompute(self):
+        """
+        Test multiple TransformerLayers chained together with a single
+        CheckpointManager, simulating TransformerBlock behavior.
+        """
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+        num_layers = 3
+
+        layers = [
+            self._create_layer_with_hyper_connection(
+                hidden_size, num_streams, layer_number=i + 1, num_layers=num_layers
+            )[0]
+            for i in range(num_layers)
+        ]
+
+        for layer in layers:
+            layer.train()
+
+        n_channels = num_streams * hidden_size
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        # Single manager for all layers (like TransformerBlock)
+        manager = CheckpointManager()
+
+        # Forward through all layers
+        h = hidden_states
+        for i, layer in enumerate(layers):
+            is_last = i == num_layers - 1
+            manager.is_last_layer_in_recompute_block = is_last
+            h, _ = layer(
+                hidden_states=h, attention_mask=attention_mask, mhc_recompute_manager=manager
+            )
+            if is_last:
+                manager.discard_all_outputs_and_register_unified_recompute(h)
+
+        # Backward pass
+        loss = h.sum()
+        loss.backward()
+
+        # Verify gradients
+        assert hidden_states.grad is not None
+        assert hidden_states.grad.shape == hidden_states.shape
+        # Check that gradient is non-trivial (not all zeros)
+        assert hidden_states.grad.abs().sum() > 0
+
+
+class TestMHCRecomputeMemorySaving:
+    """Verify that 'mhc' in recompute_modules actually reduces peak GPU memory."""
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    @staticmethod
+    def _run_forward_backward(
+        num_layers,
+        hidden_size,
+        num_streams,
+        seq_len,
+        batch_size,
+        use_recompute,
+        recompute_block_size=2,
+    ):
+        """Run a full forward + backward pass and return (peak memory, output grad).
+
+        When use_recompute=True, a new CheckpointManager is created every
+        `recompute_block_size` layers, mirroring TransformerBlock's
+        _build_mhc_recompute_layer_plan logic.
+        """
+        config = _make_mhc_config(
+            hidden_size=hidden_size,
+            num_streams=num_streams,
+            num_layers=num_layers,
+            recompute_modules=["core_attn", "mhc"] if use_recompute else None,
+            recompute_granularity='selective' if use_recompute else None,
+        )
+        layer_spec = get_gpt_layer_with_transformer_engine_spec(enable_hyper_connection=True)
+        layers = [
+            HyperConnectionTransformerLayer(
+                config, layer_spec.submodules, layer_number=i + 1
+            ).cuda()
+            for i in range(num_layers)
+        ]
+        for layer in layers:
+            layer.train()
+
+        n_channels = num_streams * hidden_size
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        torch.cuda.reset_peak_memory_stats()
+        torch.cuda.synchronize()
+
+        manager = CheckpointManager() if use_recompute else None
+
+        h = hidden_states
+        for i, layer in enumerate(layers):
+            is_last_in_block = (i == num_layers - 1) or ((i + 1) % recompute_block_size == 0)
+            kwargs = dict(hidden_states=h, attention_mask=attention_mask)
+            if manager is not None:
+                manager.is_last_layer_in_recompute_block = is_last_in_block
+                kwargs['mhc_recompute_manager'] = manager
+            h, _ = layer(**kwargs)
+            if manager is not None and is_last_in_block:
+                manager.discard_all_outputs_and_register_unified_recompute(h)
+                if i < num_layers - 1:
+                    manager = CheckpointManager()
+
+        loss = h.sum()
+        loss.backward()
+        torch.cuda.synchronize()
+
+        peak_mem = torch.cuda.max_memory_allocated()
+        grad = hidden_states.grad.clone()
+
+        del layers, hidden_states, h, loss, manager
+        torch.cuda.empty_cache()
+
+        return peak_mem, grad
+
+    def test_recompute_reduces_peak_memory(self):
+        """Peak memory with recompute (block_size=2) should be lower than without."""
+        num_layers = 8
+        hidden_size = 128
+        num_streams = 4
+        seq_len = 64
+        batch_size = 4
+
+        peak_no_recompute, _ = self._run_forward_backward(
+            num_layers, hidden_size, num_streams, seq_len, batch_size, use_recompute=False
+        )
+        peak_recompute, _ = self._run_forward_backward(
+            num_layers,
+            hidden_size,
+            num_streams,
+            seq_len,
+            batch_size,
+            use_recompute=True,
+            recompute_block_size=2,
+        )
+
+        saving_pct = (peak_no_recompute - peak_recompute) / peak_no_recompute * 100
+
+        assert peak_recompute < peak_no_recompute, (
+            f"Recompute should reduce peak memory, but got "
+            f"no_recompute={peak_no_recompute / 1e6:.1f}MB vs "
+            f"recompute={peak_recompute / 1e6:.1f}MB "
+            f"(saving={saving_pct:.1f}%)"
+        )
+
+
+class TestMHCWithCudaGraph:
+    """Test HyperConnectionTransformerLayer compatibility with CUDA graphs.
+
+    CUDA graph capture requires static computation graphs and fixed tensor shapes.
+    These tests verify that the mHC layer properly supports the CUDA graph interface
+    defined in GraphableMegatronModule and TransformerLayer.
+    """
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123, use_cudagraphable_rng=True, force_reset_rng=True)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def _create_mhc_layer(self, hidden_size=64, num_streams=4, **extra_config):
+        config = _make_mhc_config(hidden_size=hidden_size, num_streams=num_streams, **extra_config)
+        layer_spec = get_gpt_layer_with_transformer_engine_spec(enable_hyper_connection=True)
+        layer = HyperConnectionTransformerLayer(config, layer_spec.submodules)
+        layer.cuda()
+        return layer, config
+
+    def test_get_layer_static_inputs_shape_for_mhc(self):
+        """get_layer_static_inputs must return [s, b, n*C] for mHC layers.
+
+        CUDA graph capture creates static buffers whose shapes are determined by
+        this method. If the shape is [s, b, C] instead of [s, b, n*C], the graph
+        capture will produce a shape mismatch at the first hyper connection module.
+        """
+        layer, config = self._create_mhc_layer()
+        seq_length = 32
+        micro_batch_size = 2
+
+        static_inputs = layer.get_layer_static_inputs(seq_length, micro_batch_size)
+        hidden_states = static_inputs["hidden_states"]
+
+        expected_hidden_dim = config.num_residual_streams * config.hidden_size
+        assert hidden_states.shape[-1] == expected_hidden_dim, (
+            f"get_layer_static_inputs returns hidden dim {hidden_states.shape[-1]} "
+            f"but mHC expects {expected_hidden_dim} (n={config.num_residual_streams} * "
+            f"C={config.hidden_size}). "
+            f"HyperConnectionTransformerLayer must override get_layer_static_inputs."
+        )
+
+    def test_submodules_under_cudagraphs_includes_hyper_connection(self):
+        """_get_submodules_under_cudagraphs must include hyper connection modules.
+
+        CUDA graph manual hooks are set up for parameters of submodules returned
+        by this method. Missing hyper connection modules means their parameters
+        (mapping_proj, alpha_*, bias) will not get proper pre-forward hooks during
+        graph replay, leading to stale parameter values.
+        """
+        layer, config = self._create_mhc_layer()
+
+        submodules = layer._get_submodules_under_cudagraphs()
+
+        hc_modules_found = any(
+            hasattr(m, 'mapping_proj') for submod in submodules for m in submod.modules()
+        )
+        assert hc_modules_found, (
+            "_get_submodules_under_cudagraphs does not include HyperConnectionModule. "
+            "Parameters like mapping_proj, alpha_pre/post/res will not be updated "
+            "during CUDA graph replay."
+        )
+
+    def test_forward_through_te_cuda_graph_capture_path(self):
+        """_te_cuda_graph_capture must produce correct output shapes for mHC.
+
+        TE CUDA graph capture calls _te_cuda_graph_capture() during warmup.
+        For mHC layers, the input must be n-stream [s, b, n*C] and output must
+        also be [s, b, n*C].
+        """
+        layer, config = self._create_mhc_layer()
+        layer.eval()
+
+        seq_len = 8
+        batch_size = 2
+        n_channels = config.num_residual_streams * config.hidden_size
+
+        hidden_states = torch.randn(seq_len, batch_size, n_channels, device='cuda')
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        with torch.no_grad():
+            outputs = layer._te_cuda_graph_capture(
+                hidden_states=hidden_states, attention_mask=attention_mask
+            )
+
+        if isinstance(outputs, tuple):
+            output = outputs[0]
+        else:
+            output = outputs
+
+        assert output.shape == (seq_len, batch_size, n_channels), (
+            f"_te_cuda_graph_capture output shape {output.shape} != "
+            f"expected {(seq_len, batch_size, n_channels)}"
+        )
+
+    def test_cuda_graph_fwd_bwd_with_hyper_connection(self):
+        """End-to-end CUDA graph capture and replay for forward+backward with mHC.
+
+        Captures both the forward and backward pass of HyperConnectionTransformerLayer
+        into a torch.cuda.CUDAGraph and replays it with fresh input data, verifying
+        that the computation graph is fully static (capturable) and produces correct
+        output shapes and non-trivial gradients.
+        """
+        layer, config = self._create_mhc_layer()
+        layer.train()
+
+        seq_len = 8
+        batch_size = 2
+        n_channels = config.num_residual_streams * config.hidden_size
+
+        static_input = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        # Warmup on side stream to trigger lazy allocations
+        s = torch.cuda.Stream()
+        s.wait_stream(torch.cuda.current_stream())
+        with torch.cuda.stream(s):
+            for _ in range(3):
+                out, _ = layer(hidden_states=static_input, attention_mask=attention_mask)
+                out.sum().backward()
+        torch.cuda.current_stream().wait_stream(s)
+
+        # Set .grad to None so backward allocates fresh gradient tensors in the
+        # graph's private memory pool during capture.
+        layer.zero_grad(set_to_none=True)
+        static_input.grad = None
+
+        g = torch.cuda.CUDAGraph()
+        with torch.cuda.graph(g):
+            output, _ = layer(hidden_states=static_input, attention_mask=attention_mask)
+            output.sum().backward()
+
+        # Replay with new input data.
+        # Use no_grad because backward inside the captured graph already
+        # bumped the autograd version counter on static_input, making
+        # in-place copy_ illegal without disabling grad tracking.
+        with torch.no_grad():
+            static_input.copy_(torch.randn_like(static_input))
+        g.replay()
+
+        assert output.shape == (
+            seq_len,
+            batch_size,
+            n_channels,
+        ), f"Output shape {output.shape} != expected {(seq_len, batch_size, n_channels)}"
+        assert (
+            static_input.grad is not None
+        ), "Gradients should be computed for static_input after graph replay"
+        assert static_input.grad.shape == static_input.shape
+        assert static_input.grad.abs().sum() > 0, "Gradients should be non-trivial"
+
+        # Verify numerical consistency: graph replay should match eager execution
+        # with the same input and weights.
+        test_data = torch.randn(seq_len, batch_size, n_channels, device='cuda')
+
+        with torch.no_grad():
+            static_input.copy_(test_data)
+        g.replay()
+        graph_out = output.detach().clone()
+        graph_grad = static_input.grad.detach().clone()
+
+        eager_input = test_data.clone().requires_grad_(True)
+        eager_output, _ = layer(hidden_states=eager_input, attention_mask=attention_mask)
+        eager_output.sum().backward()
+
+        assert torch.allclose(graph_out, eager_output.detach(), atol=1e-5), (
+            f"Graph vs eager output mismatch: "
+            f"max diff = {(graph_out - eager_output.detach()).abs().max().item()}"
+        )
+        assert torch.allclose(graph_grad, eager_input.grad, atol=1e-5), (
+            f"Graph vs eager gradient mismatch: "
+            f"max diff = {(graph_grad - eager_input.grad).abs().max().item()}"
+        )
+
+    def test_cuda_graph_fwd_bwd_with_hyper_connection_and_recompute(self):
+        """CUDA graph capture+replay for fwd+bwd with mHC and CheckpointManager.
+
+        When a CheckpointManager is used, additional CheckpointWithoutOutput
+        objects are created for layernorm and hyper-connection operations. The
+        manager discards intermediate activations during forward (storage.resize_(0))
+        and recomputes them during backward via a unified gradient hook.
+        This test verifies the full capture+replay still works correctly.
+        """
+        layer, config = self._create_mhc_layer()
+        layer.train()
+
+        seq_len = 8
+        batch_size = 2
+        n_channels = config.num_residual_streams * config.hidden_size
+
+        static_input = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        # Warmup on side stream; fresh manager per iteration to avoid stale state.
+        s = torch.cuda.Stream()
+        s.wait_stream(torch.cuda.current_stream())
+        with torch.cuda.stream(s):
+            for _ in range(3):
+                mgr = CheckpointManager()
+                mgr.is_last_layer_in_recompute_block = True
+                out, _ = layer(
+                    hidden_states=static_input,
+                    attention_mask=attention_mask,
+                    mhc_recompute_manager=mgr,
+                )
+                mgr.discard_all_outputs_and_register_unified_recompute(out)
+                out.sum().backward()
+        torch.cuda.current_stream().wait_stream(s)
+
+        layer.zero_grad(set_to_none=True)
+        static_input.grad = None
+
+        capture_mgr = CheckpointManager()
+        capture_mgr.is_last_layer_in_recompute_block = True
+
+        g = torch.cuda.CUDAGraph()
+        with torch.cuda.graph(g):
+            output, _ = layer(
+                hidden_states=static_input,
+                attention_mask=attention_mask,
+                mhc_recompute_manager=capture_mgr,
+            )
+            capture_mgr.discard_all_outputs_and_register_unified_recompute(output)
+            output.sum().backward()
+
+        # Replay with new input data.
+        with torch.no_grad():
+            static_input.copy_(torch.randn_like(static_input))
+        g.replay()
+
+        assert output.shape == (
+            seq_len,
+            batch_size,
+            n_channels,
+        ), f"Output shape {output.shape} != expected {(seq_len, batch_size, n_channels)}"
+        assert (
+            static_input.grad is not None
+        ), "Gradients should be computed for static_input after graph replay"
+        assert static_input.grad.shape == static_input.shape
+        assert static_input.grad.abs().sum() > 0, "Gradients should be non-trivial"
+
+        # Numerical consistency: graph replay vs eager with the same input.
+        test_data = torch.randn(seq_len, batch_size, n_channels, device='cuda')
+
+        with torch.no_grad():
+            static_input.copy_(test_data)
+        g.replay()
+        graph_out = output.detach().clone()
+        graph_grad = static_input.grad.detach().clone()
+
+        eager_mgr = CheckpointManager()
+        eager_mgr.is_last_layer_in_recompute_block = True
+        eager_input = test_data.clone().requires_grad_(True)
+        eager_output, _ = layer(
+            hidden_states=eager_input,
+            attention_mask=attention_mask,
+            mhc_recompute_manager=eager_mgr,
+        )
+        eager_mgr.discard_all_outputs_and_register_unified_recompute(eager_output)
+        eager_output.sum().backward()
+
+        assert torch.allclose(graph_out, eager_output.detach(), atol=1e-5), (
+            f"Graph vs eager output mismatch: "
+            f"max diff = {(graph_out - eager_output.detach()).abs().max().item()}"
+        )
+        assert torch.allclose(graph_grad, eager_input.grad, atol=1e-5), (
+            f"Graph vs eager gradient mismatch: "
+            f"max diff = {(graph_grad - eager_input.grad).abs().max().item()}"
+        )
+
+    def test_mcore_cudagraph_manager_with_mhc_recompute_manager(self):
+        """MCore CudaGraphManager must not crash on mhc_recompute_manager kwarg.
+
+        When cuda_graph_impl="local" is set, TransformerLayer.__call__ routes
+        through MegatronModule.__call__ → CudaGraphManager.__call__, which
+        iterates over all kwargs to check supported types. CheckpointManager
+        (used by mhc_recompute_manager) is not a CUDA-graph-supported type.
+
+        This test verifies that mhc_recompute_manager is properly extracted
+        from kwargs before the CudaGraphManager sees them, preventing the
+        AssertionError that would otherwise occur.
+        """
+        layer, config = self._create_mhc_layer(cuda_graph_impl="local", cuda_graph_scope="attn")
+        layer.train()
+
+        assert hasattr(
+            layer, 'cudagraph_manager'
+        ), "Layer should have cudagraph_manager with cuda_graph_impl='local'"
+
+        seq_len = 8
+        batch_size = 2
+        n_channels = config.num_residual_streams * config.hidden_size
+
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        mgr = CheckpointManager()
+        mgr.is_last_layer_in_recompute_block = True
+
+        output, context = layer(
+            hidden_states=hidden_states, attention_mask=attention_mask, mhc_recompute_manager=mgr
+        )
+
+        assert output.shape == (seq_len, batch_size, n_channels)
+
+    def test_mcore_cudagraph_manager_without_mhc_recompute_manager(self):
+        """MCore CudaGraphManager path works when mhc_recompute_manager is None."""
+        layer, config = self._create_mhc_layer(cuda_graph_impl="local", cuda_graph_scope="attn")
+        layer.train()
+
+        seq_len = 8
+        batch_size = 2
+        n_channels = config.num_residual_streams * config.hidden_size
+
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        output, context = layer(hidden_states=hidden_states, attention_mask=attention_mask)
+
+        assert output.shape == (seq_len, batch_size, n_channels)
+
+
+class TestMHCWithOffloading:
+    """Test HyperConnectionTransformerLayer with fine-grained activation offloading.
+
+    Fine-grained activation offloading transfers specific activations (e.g., layernorm
+    inputs) to CPU during forward and reloads them during backward. These tests verify
+    that the mHC layer's multi-stream architecture works correctly with offloading.
+    """
+
+    def setup_method(self, method):
+        Utils.initialize_model_parallel(1, 1)
+        model_parallel_cuda_manual_seed(123)
+
+    def teardown_method(self, method):
+        Utils.destroy_model_parallel()
+
+    def _create_mhc_layer_with_offloading(
+        self, hidden_size=64, num_streams=4, offload_modules=None
+    ):
+        if offload_modules is None:
+            offload_modules = ["attn_norm", "mlp_norm"]
+
+        config = _make_mhc_config(
+            hidden_size=hidden_size,
+            num_streams=num_streams,
+            fine_grained_activation_offloading=True,
+            offload_modules=offload_modules,
+        )
+        layer_spec = get_gpt_layer_with_transformer_engine_spec(enable_hyper_connection=True)
+        layer = HyperConnectionTransformerLayer(config, layer_spec.submodules)
+        layer.cuda()
+        return layer, config
+
+    def test_forward_backward_with_offloading(self):
+        """Forward+backward should work with activation offloading enabled.
+
+        This exercises the off_interface context manager around layernorms in
+        the mHC forward path, including the group_commit that commits the
+        offloading group for the aggregated 1-stream layernorm input.
+        """
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            PipelineOffloadManager,
+        )
+
+        layer, config = self._create_mhc_layer_with_offloading()
+        layer.train()
+
+        seq_len = 8
+        batch_size = 2
+        n_channels = config.num_residual_streams * config.hidden_size
+
+        hidden_states = torch.randn(
+            seq_len, batch_size, n_channels, device='cuda', requires_grad=True
+        )
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        mgr = PipelineOffloadManager.get_instance()
+        mgr.init_model_chunk_offload_handler(
+            pp_rank=0, vp_size=1, vp_stage=0, min_offloaded_tensor_size=0
+        )
+
+        output, context = layer(hidden_states=hidden_states, attention_mask=attention_mask)
+
+        assert output.shape == (
+            seq_len,
+            batch_size,
+            n_channels,
+        ), f"Output shape {output.shape} != expected {(seq_len, batch_size, n_channels)}"
+
+        loss = output.sum()
+        loss.backward()
+
+        assert hidden_states.grad is not None, "Gradients should flow through offloaded path"
+        assert hidden_states.grad.shape == hidden_states.shape
+        assert hidden_states.grad.abs().sum() > 0, "Gradients should be non-trivial"
+
+        PipelineOffloadManager.reset_instance()
+
+    def test_offloading_numerical_equivalence(self):
+        """Offloaded forward+backward must produce the same result as non-offloaded.
+
+        Compares outputs and gradients between a layer with offloading disabled
+        vs enabled to ensure the offloading path does not corrupt activations.
+        """
+        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
+            PipelineOffloadManager,
+        )
+
+        PipelineOffloadManager.reset_instance()
+
+        hidden_size = 64
+        num_streams = 4
+        seq_len = 8
+        batch_size = 2
+        n_channels = num_streams * hidden_size
+
+        torch.manual_seed(42)
+        input_data = torch.randn(seq_len, batch_size, n_channels, device='cuda')
+        attention_mask = torch.ones((1, 1, seq_len, seq_len), dtype=bool, device='cuda')
+
+        # Run without offloading
+        config_no_offload = _make_mhc_config(hidden_size=hidden_size, num_streams=num_streams)
+        layer_spec = get_gpt_layer_with_transformer_engine_spec(enable_hyper_connection=True)
+        layer_no_offload = HyperConnectionTransformerLayer(
+            config_no_offload, layer_spec.submodules
+        ).cuda()
+        layer_no_offload.train()
+
+        h1 = input_data.clone().detach().requires_grad_(True)
+        out1, _ = layer_no_offload(hidden_states=h1, attention_mask=attention_mask)
+        out1.sum().backward()
+        grad_no_offload = h1.grad.clone()
+        out1_detached = out1.detach().clone()
+
+        # Run with offloading using the same weights
+        config_offload = _make_mhc_config(
+            hidden_size=hidden_size,
+            num_streams=num_streams,
+            fine_grained_activation_offloading=True,
+            offload_modules=["attn_norm", "mlp_norm"],
+        )
+        layer_offload = HyperConnectionTransformerLayer(
+            config_offload, layer_spec.submodules
+        ).cuda()
+        layer_offload.load_state_dict(layer_no_offload.state_dict())
+        layer_offload.train()
+
+        mgr = PipelineOffloadManager.get_instance()
+        mgr.init_model_chunk_offload_handler(
+            pp_rank=0, vp_size=1, vp_stage=0, min_offloaded_tensor_size=0
+        )
+
+        h2 = input_data.clone().detach().requires_grad_(True)
+        out2, _ = layer_offload(hidden_states=h2, attention_mask=attention_mask)
+        out2.sum().backward()
+        grad_offload = h2.grad.clone()
+
+        PipelineOffloadManager.reset_instance()
+
+        assert torch.allclose(out1_detached, out2.detach(), atol=1e-5), (
+            f"Forward outputs differ: max diff = "
+            f"{(out1_detached - out2.detach()).abs().max().item()}"
+        )
+        assert torch.allclose(grad_no_offload, grad_offload, atol=1e-5), (
+            f"Gradients differ: max diff = "
+            f"{(grad_no_offload - grad_offload).abs().max().item()}"
+        )
diff --git a/uv.lock b/uv.lock
index 62a8227e803..a7399c6c16a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,20 +1,25 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.10"
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version < '3.11'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 conflicts = [[
     { package = "megatron-core", extra = "dev" },
@@ -39,21 +44,21 @@ wheels = [
 
 [[package]]
 name = "accelerate"
-version = "1.13.0"
+version = "1.12.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "psutil" },
     { name = "pyyaml" },
     { name = "safetensors" },
     { name = "torch", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ca/14/787e5498cd062640f0f3d92ef4ae4063174f76f9afd29d13fc52a319daae/accelerate-1.13.0.tar.gz", hash = "sha256:d631b4e0f5b3de4aff2d7e9e6857d164810dfc3237d54d017f075122d057b236", size = 402835, upload-time = "2026-03-04T19:34:12.359Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4a/8e/ac2a9566747a93f8be36ee08532eb0160558b07630a081a6056a9f89bf1d/accelerate-1.12.0.tar.gz", hash = "sha256:70988c352feb481887077d2ab845125024b2a137a5090d6d7a32b57d03a45df6", size = 398399, upload-time = "2025-11-21T11:27:46.973Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7e/46/02ac5e262d4af18054b3e922b2baedbb2a03289ee792162de60a865defc5/accelerate-1.13.0-py3-none-any.whl", hash = "sha256:cf1a3efb96c18f7b152eb0fa7490f3710b19c3f395699358f08decca2b8b62e0", size = 383744, upload-time = "2026-03-04T19:34:10.313Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/d2/c581486aa6c4fbd7394c23c47b83fa1a919d34194e16944241daf9e762dd/accelerate-1.12.0-py3-none-any.whl", hash = "sha256:3e2091cd341423207e2f084a6654b1efcd250dc326f2a37d6dde446e07cabb11", size = 380935, upload-time = "2025-11-21T11:27:44.522Z" },
 ]
 
 [[package]]
@@ -70,7 +75,7 @@ wheels = [
 
 [[package]]
 name = "aiobotocore"
-version = "3.2.1"
+version = "2.26.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -79,12 +84,11 @@ dependencies = [
     { name = "jmespath" },
     { name = "multidict" },
     { name = "python-dateutil" },
-    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
-    { name = "wrapt" },
+    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" } },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1d/ce/7d593e50d481b649c99a407c8249f9cf6437840a3adc4ecc9127f9a843d2/aiobotocore-3.2.1.tar.gz", hash = "sha256:59b1c1f59860cb10b2e5096edcc87a88842bee301969bd76a3ca0b1c4c30e6d3", size = 122788, upload-time = "2026-03-04T22:30:43.342Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4d/f8/99fa90d9c25b78292899fd4946fce97b6353838b5ecc139ad8ba1436e70c/aiobotocore-2.26.0.tar.gz", hash = "sha256:50567feaf8dfe2b653570b4491f5bc8c6e7fb9622479d66442462c021db4fadc", size = 122026, upload-time = "2025-11-28T07:54:59.956Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/02/78/79aa8169408996f5a71150abdea2e5e0f364df250c9e54ac385f115c7436/aiobotocore-3.2.1-py3-none-any.whl", hash = "sha256:68b7474af3e7124666b8e191805db5a7255d14e6187e0472481c845b6062e42e", size = 87737, upload-time = "2026-03-04T22:30:41.594Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/58/3bf0b7d474607dc7fd67dd1365c4e0f392c8177eaf4054e5ddee3ebd53b5/aiobotocore-2.26.0-py3-none-any.whl", hash = "sha256:a793db51c07930513b74ea7a95bd79aaa42f545bdb0f011779646eafa216abec", size = 87333, upload-time = "2025-11-28T07:54:58.457Z" },
 ]
 
 [[package]]
@@ -318,37 +322,37 @@ wheels = [
 
 [[package]]
 name = "apache-tvm-ffi"
-version = "0.1.9"
+version = "0.1.8.post2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6f/60/1e787a0b5ebf318483235be2a689ee367173983067e441b8379564f667c0/apache_tvm_ffi-0.1.9.tar.gz", hash = "sha256:d2d402587e8906de0a07f4746aa78f3d452c7efe3625d4bb39ac2ad693bce530", size = 2513731, upload-time = "2026-02-27T19:28:06.602Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/56/3d/4594c14de64e92697a91eec8ac6518ad72a3f30776aff432e68c2c6d9d3d/apache_tvm_ffi-0.1.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d911cbbc83bf12a0d9ec03e5315ff1bb92d95702fe912cd7a050393274382e71", size = 2068752, upload-time = "2026-02-27T19:27:03.001Z" },
-    { url = "https://files.pythonhosted.org/packages/83/0a/827e4f9ae85e1be3037818abd59566d906ba1fe27295c6938b12cc482151/apache_tvm_ffi-0.1.9-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1c8dd4018420c0d14bace688594710909ce198056ff8ac2ad1cd462b30fe1bdd", size = 2231204, upload-time = "2026-02-27T19:27:04.734Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/b6/f1ec5c528918c4dae03885ec472663072a984431d7d7fb04ca0798a2e13c/apache_tvm_ffi-0.1.9-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f6bc8846d570b8ce38692fc91b530b44cd6ae092c805a844da23970e81b12c0", size = 2323684, upload-time = "2026-02-27T19:27:06.284Z" },
-    { url = "https://files.pythonhosted.org/packages/28/08/818836fbc0f198da1597896f82d7e6556bf5678cd5150d633214bf14b718/apache_tvm_ffi-0.1.9-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3ec9149f207a7af3ea3531cad7a0b0d04ded06df4f51a547479d5eb489428dd", size = 2160066, upload-time = "2026-02-27T19:27:07.897Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/6b/2e7d73d055523c2fb31394cd3d55593969a0680619e1c939c2128c2fdd36/apache_tvm_ffi-0.1.9-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eefcd17f61bf503ff0f4ad429e03ef6c241c7d13682f58281d883218b854c9bd", size = 2307014, upload-time = "2026-02-27T19:27:10.287Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/9d/9b99efbeaaed4c78a2b7cfeda6b8fc7d6249616938c05ae0248aa0bf0d56/apache_tvm_ffi-0.1.9-cp310-cp310-win_amd64.whl", hash = "sha256:dd58da01331826fbe6c064d6f0c9bbc2d62883b78df8d15baa8ea21d37507e4d", size = 1993158, upload-time = "2026-02-27T19:27:11.884Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/44/130571cede8704b1412e48b3dd78de41b4d31b68241f954743d1a9925bd9/apache_tvm_ffi-0.1.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:932d94e29595a47109f0ef6e0b4209a934451582954ea8b426e758d6b3e307e3", size = 2070368, upload-time = "2026-02-27T19:27:13.779Z" },
-    { url = "https://files.pythonhosted.org/packages/42/b1/9f2cfd6d49b03c5d4ec5c12548d911e2e01265be783f343103b4df716765/apache_tvm_ffi-0.1.9-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c0449fc3802987c3652bea266ffda2934a6f69c80bba791a3f55b91040656a18", size = 2231154, upload-time = "2026-02-27T19:27:15.691Z" },
-    { url = "https://files.pythonhosted.org/packages/55/43/63faedea83494e99122466a993bcdccd31cf93c7e8a0d56731120e82e2b9/apache_tvm_ffi-0.1.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6f16d73a82a9e68a439b7d233d48b1b929be17fe92df4bbf1ee2274e573144a3", size = 2323130, upload-time = "2026-02-27T19:27:17.259Z" },
-    { url = "https://files.pythonhosted.org/packages/27/96/d735bc4c528efaf0a8a954076963c727aad2dde8577641aa9025ec4f2d52/apache_tvm_ffi-0.1.9-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:01ebb1308b2666c206aa9a4015eb48f03a5d98ea2e9cfb002bd5e2ca0b9c7ef3", size = 2159854, upload-time = "2026-02-27T19:27:18.789Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/3b/6cfc82a3ab5d9e501bbcee5df36eebe09da1c384461d7a55e2a17776d117/apache_tvm_ffi-0.1.9-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21365abd2a2a1a6d3b4e6e4f048309651125becfa795440c3607f3cc27d30ac7", size = 2307140, upload-time = "2026-02-27T19:27:20.222Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/61/3ffe1fe3190e12807a12b72ed0d291c7f66569c2e7c3571fde18175f19e1/apache_tvm_ffi-0.1.9-cp311-cp311-win_amd64.whl", hash = "sha256:9ee710a9fba3d9ff9747870bbd7e2175eb8d5b9c791f17fd645f35f6dab3f8aa", size = 1993218, upload-time = "2026-02-27T19:27:22.043Z" },
-    { url = "https://files.pythonhosted.org/packages/df/f2/b8c4b151169f6d7ba8773c8af68b2e0c1013d7fb3f1bdf87573f47157ce9/apache_tvm_ffi-0.1.9-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:49e52350b0470654847de752e65603b604a4d3323e7e9f5e8a982f44acc4c143", size = 2041756, upload-time = "2026-02-27T19:27:23.931Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/c0/6d3d54f50012255b41bc3e24944c086f63c4707c8686c7c6780e9283eb96/apache_tvm_ffi-0.1.9-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d503029e66c43b1a1cb1a42a1e9bb428c8a28dcbdec31c28e705472ca648a3a", size = 2203712, upload-time = "2026-02-27T19:27:25.867Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/dd/2bab4c6cd86257dbf99e93452a1af833113f8dc3e25a25579f6e4e4c8a94/apache_tvm_ffi-0.1.9-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28241371934ea8af10d5067087ba1229ebddded7b2c02d33a258ec2a96df8c46", size = 2299704, upload-time = "2026-02-27T19:27:27.477Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/4a/b469bcb2e1014cb84d336d2a59f42958a058251c577a4c2680cacad346e2/apache_tvm_ffi-0.1.9-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:87cacce81df55685fc6a76e1e3c5db1200e85e87bf5974b692c59d131b7bc622", size = 2130865, upload-time = "2026-02-27T19:27:29.092Z" },
-    { url = "https://files.pythonhosted.org/packages/70/ef/5402da5d37f5270fd88ea0348acca78dba9be8bdbf6c2bcae0935eb03ef1/apache_tvm_ffi-0.1.9-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f45eb43499acac45ff6c93564f0ff2d3ca27b69656d540fd56ce59d51c0b4c65", size = 2278991, upload-time = "2026-02-27T19:27:30.729Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/23/1b7dc5f0807f83098183a57db6ee85b2c93b646d74a6e03781c9208aaeb0/apache_tvm_ffi-0.1.9-cp312-abi3-win_amd64.whl", hash = "sha256:d1dcf4c041d5ec05e3da1d545800c33cdbb95c113baa7705085ff79fa262752b", size = 1973200, upload-time = "2026-02-27T19:27:32.367Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/1e/991ae65e64ce132c1ba665562db6638f5696d6133f580e20c653de33b9af/apache_tvm_ffi-0.1.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c3349f72ddb8ce206472d0380a729f213017a2180707096f8d57114b81097dd1", size = 2072944, upload-time = "2026-02-27T19:27:34.261Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/a7/1e0643949e683fb3cfababd87058c0cfef122d1a3bb6ce703f719051b842/apache_tvm_ffi-0.1.9-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d1f4d2b7ec7b1213632e9a104e9330bfc3dec48decffa62114c33aa188c9f43a", size = 2215954, upload-time = "2026-02-27T19:27:35.872Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/06/5016191ab61d2db4c3a7d754a3c1184e0836f575a7d08491669738c5e4b9/apache_tvm_ffi-0.1.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e4f01d16ba53fe118e363f7257253f07003797e4abe6fc9567f23b6a930dbff2", size = 2307291, upload-time = "2026-02-27T19:27:37.527Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/f5/40bf0667330938efbfc0a51743cc53c79e41b4ece1a8abad3076192c9674/apache_tvm_ffi-0.1.9-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c0581dd6bfbce7b017ef85cfda08bbe38891cc4b3afbcfaa8bc2d383728e426", size = 2143850, upload-time = "2026-02-27T19:27:40.437Z" },
-    { url = "https://files.pythonhosted.org/packages/72/4a/421cbd4ed32e8bad3b88af3e8fa145c1f6f493bdd05be15b6f2d9b3cb7d6/apache_tvm_ffi-0.1.9-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7dfa14be2a49347791ef21222a8225ce7f99bfec17104a676cb4f1bf3a107088", size = 2289038, upload-time = "2026-02-27T19:27:41.972Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/1a/c8923d819b49872a612033b90d29299c0be73a7cbed1ddb3dc78dfe5e9f1/apache_tvm_ffi-0.1.9-cp314-cp314t-win_amd64.whl", hash = "sha256:a42d7ca27dce83efbdce7ec970fe3e773a69c31d928730ee5d9badb1229d106c", size = 2039007, upload-time = "2026-02-27T19:27:43.618Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/e3/e9/a13952726228fa6282154ecf927092396bc759739e5e045019f6ab92f3ca/apache_tvm_ffi-0.1.8.post2.tar.gz", hash = "sha256:4513e38852894f290172ecfefcbc18d34e817fd29c16a0f1770e130c82b4067e", size = 2441111, upload-time = "2026-01-13T18:11:27.864Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cd/65/0c67653e6431716f2706e29f2e2e1ce9a6f9d9f7615c0c637a4881c3f5a5/apache_tvm_ffi-0.1.8.post2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e11e03c865297c65c2f206c90b8014890bc52a3059d8148b47cd2c2759bcea90", size = 1838436, upload-time = "2026-01-13T18:10:22.334Z" },
+    { url = "https://files.pythonhosted.org/packages/46/8f/13fe7acbd7497312fda5faf51545fcb50c0ed5398cfe525d006ba29f1b9b/apache_tvm_ffi-0.1.8.post2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e855f2b3f60ec16939b00e1b594ce7f488f96e387b12547e98643177f70ab2b1", size = 1996102, upload-time = "2026-01-13T18:10:23.97Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/f8/b469a4d91ea74f627cb220835049fb60a566f7427f27c9f66c6c54a287b6/apache_tvm_ffi-0.1.8.post2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:854ecd90a1039d542c531fa6a4928f5633452aedf1ed7f646f3bbbeca8217156", size = 2069067, upload-time = "2026-01-13T18:10:25.425Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/88/663e532e7ba625a3998724ae0207ce620c32a057c339b4e4ae0be6810d85/apache_tvm_ffi-0.1.8.post2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1894b6f9c2b45bc9df8e407d041e575128591b998ced09f974675d2bb6b8bc9", size = 1939413, upload-time = "2026-01-13T18:10:28.61Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/16/6ec659fd5b3b163de9adc75bf29fc90460d212b489947b77b8ed89c01472/apache_tvm_ffi-0.1.8.post2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef922ef3ed971a4e161a0385ef9f67af379d52b0d83d62c08b79f6707b6660b5", size = 2053058, upload-time = "2026-01-13T18:10:30.721Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/a8/d01f81987db9bbfc4b242575d3fe79f72aeba3582ca449fec28d19938400/apache_tvm_ffi-0.1.8.post2-cp310-cp310-win_amd64.whl", hash = "sha256:146f98dcd21052eeed96ad07472bdffd8189fb2106edc6e3de91e28e3b000bf8", size = 1809231, upload-time = "2026-01-13T18:10:32.293Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/86/7db24692281d80204d07d77346ad4cb87f6183f1364ed94311993a47ed1a/apache_tvm_ffi-0.1.8.post2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:40f5fba3e06617f16888a0fdaf7ab4049841ff6e741644be822400438b771fe7", size = 1840013, upload-time = "2026-01-13T18:10:33.724Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/cc/fbaef883c6ba8e2c56ffcca997f2c076d1c14787799a62f39bd52c7126d5/apache_tvm_ffi-0.1.8.post2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9eb6d228fa22b6a5da140d761962f022a154746c91fe7608c49062deaf671f9f", size = 1995159, upload-time = "2026-01-13T18:10:35.727Z" },
+    { url = "https://files.pythonhosted.org/packages/49/08/f1e984e3573d0cbd6d53f3f73a12691fba153afc529fbd506d78e739b330/apache_tvm_ffi-0.1.8.post2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:581c0acf845859be0cc26ac79f3663a83393b662c97c7125ebb78f0228b69d96", size = 2068543, upload-time = "2026-01-13T18:10:39.12Z" },
+    { url = "https://files.pythonhosted.org/packages/35/1f/5336d430a133cf66ca9dac8ae9b6e25d8b99275a6687656421a1deee9f1b/apache_tvm_ffi-0.1.8.post2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:beadc7bb480ae02d02e2108543f6f4b4170d77e361ab3ccb43697d174ec185b0", size = 1939018, upload-time = "2026-01-13T18:10:40.621Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/67/969c66a27a128cf738d0c068e0d4451d691d8197929c797cbe8e59c6cfc9/apache_tvm_ffi-0.1.8.post2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e593d191c7ca0726ebcd3b024a4bc8140694fdfce2e7b02493f38ad5c4c9ecf7", size = 2053068, upload-time = "2026-01-13T18:10:43.241Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/f1/84881a799d227fdc4a61fbf0cb8d5ceb6a72ad788fa9070e5853ed9759b6/apache_tvm_ffi-0.1.8.post2-cp311-cp311-win_amd64.whl", hash = "sha256:1c685f19d0f26d9356c7c77a1cb652a3632ec9ee6cd21aa1d8cfb968743ec1fd", size = 1809557, upload-time = "2026-01-13T18:10:44.743Z" },
+    { url = "https://files.pythonhosted.org/packages/12/8b/a39d6c6eb1a87f6003e2717695cc6d44cc65ccd57dae5a0af944c0d25751/apache_tvm_ffi-0.1.8.post2-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:c13ec7fc8f255767998b301ace0cd1e7d17ba76b48ffeb97ca9eb22a3314e250", size = 1811882, upload-time = "2026-01-13T18:10:46.317Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/3a/7b1c9edcaeaebb945038144896cf17eb828a40b6ace0371823e133132664/apache_tvm_ffi-0.1.8.post2-cp312-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c78b4caf17304a1f47881bccdb2f9ac24d98b3b7fbe761a6dd4fd0585934d96", size = 1967259, upload-time = "2026-01-13T18:10:47.851Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/b6/463602f57dda2e1c69165c044c07061cd59404593f313a427a3ad9c02cf3/apache_tvm_ffi-0.1.8.post2-cp312-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4a48da3fa8f47130f3502134f01e97044388c5217e7b91be4b0acec4feab81a0", size = 2044821, upload-time = "2026-01-13T18:10:49.396Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/e6/9cdc7f4814b2fbdfceba5dc640c3704d07d8db18e3d1aef5aa49bbf1ba7e/apache_tvm_ffi-0.1.8.post2-cp312-abi3-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:61cc98e489ebc03bc96d1a966dc863eb1c0a607383f6bf4a416ff0a96170ca85", size = 1910964, upload-time = "2026-01-13T18:10:51.345Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/f5/a2e5487cdad575fe6cf34f8a23f8c49e08ce5808fa75dc19d98bcebc20ec/apache_tvm_ffi-0.1.8.post2-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caa48509f0c7d9b896823b492a9ee42afac2548065c1ec7ef07f9a0dc30d2796", size = 2025814, upload-time = "2026-01-13T18:10:52.804Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/0d/8922c142281187ae6b989579876d00d20b84ccd3878aad487b91d951d254/apache_tvm_ffi-0.1.8.post2-cp312-abi3-win_amd64.whl", hash = "sha256:985831722d1dd562d13e8e34102fd99f42f964c53fc7cf9d80fc4f7602f89196", size = 1790204, upload-time = "2026-01-13T18:10:54.558Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/6e/2c21e754adf5c08fff154ee0a75b01568a4ed5da2d8f4a4a95d8451736e0/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4a3f6cb1173cfe19a1b66fd8577a6f3ce644bdc22691961c07c64304a7c3f17a", size = 1842240, upload-time = "2026-01-13T18:10:56.652Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/0a/342dd451d714b683143bd0d7dbd26279772dedf1d827a7efd357f05ff0aa/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ac6c2d4e117ca63974bcd20fdf5715d01f3b4d0ed78921f493461050daf7c1a3", size = 1980660, upload-time = "2026-01-13T18:10:58.892Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/63/59f00116530cf7513866467de9044dbdd1954a536009e56c44f167743b35/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0bc5456f971097dcd973daba32cb6f321893873c53235159ab6426b0c7bef7e2", size = 2052810, upload-time = "2026-01-13T18:11:01.698Z" },
+    { url = "https://files.pythonhosted.org/packages/46/dc/e22c784937fdc907785a764d773ef57a925c443d8ec01ad8bff43dd8d8d6/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f2016b4b31e7f75d71c638bbd1ae43d6e239cf8e20b539fb9de6917b3fb25bc", size = 1923716, upload-time = "2026-01-13T18:11:03.225Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/39/695f5642979d1d2d4cd3fca92e7b3b324ebba734b8aab9bdbacc26d4a05c/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c0ca7be630d0888eae163a4298ddfb3f7bd837112c7e6ffcd7157e34e78215b", size = 2035440, upload-time = "2026-01-13T18:11:04.841Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/e0/ed152425e51b7c8a4ce81d33683b43d87e770a76a65922dc7524a0106ae8/apache_tvm_ffi-0.1.8.post2-cp314-cp314t-win_amd64.whl", hash = "sha256:ecb0d9f7f410ba3b4d92547c2477f73f8406455448f4ea8c146515671fd20210", size = 1849938, upload-time = "2026-01-13T18:11:06.312Z" },
 ]
 
 [[package]]
@@ -383,33 +387,59 @@ wheels = [
 
 [[package]]
 name = "av"
-version = "17.0.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b2/eb/abca886df3a091bc406feb5ff71b4c4f426beaae6b71b9697264ce8c7211/av-17.0.0.tar.gz", hash = "sha256:c53685df73775a8763c375c7b2d62a6cb149d992a26a4b098204da42ade8c3df", size = 4410769, upload-time = "2026-03-14T14:38:45.868Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/95/4d/ea1ac272eeea83014daca1783679a9e9f894e1e68e5eb4f717dd8813da2a/av-17.0.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:4b21bcff4144acae658c0efb011fa8668c7a9638384f3ae7f5add33f35b907c6", size = 23407827, upload-time = "2026-03-14T14:37:47.337Z" },
-    { url = "https://files.pythonhosted.org/packages/54/1a/e433766470c57c9c1c8558021de4d2466b3403ed629e48722d39d12baa6c/av-17.0.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:17cd518fc88dc449ce9dcfd0b40e9b3530266927375a743efc80d510adfb188b", size = 18829899, upload-time = "2026-03-14T14:37:50.493Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/25/95ad714f950c188495ffbfef235d06a332123d6f266026a534801ffc2171/av-17.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:9a8b7b63a92d8dc7cbe5000546e4684176124ddd49fdd9c12570e3aa6dadf11a", size = 35348062, upload-time = "2026-03-14T14:37:52.964Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/db/7f3f9e92f2ac8dba639ab01d69a33b723aa16b5e3e612dbfe667fbc02dcd/av-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:8706ce9b5d8d087d093b46a9781e7532c4a9e13874bca1da468be78efc56cecc", size = 37684503, upload-time = "2026-03-14T14:37:55.628Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/53/3b356b14ba72354688c8d9777cf67b707769b6e14b63aaeb0cddeeac8d32/av-17.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3a074835ce807434451086993fedfb3b223dacedb2119ab9d7a72480f2d77f32", size = 36547601, upload-time = "2026-03-14T14:37:58.465Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/8d/f489cd6f9fe9c8b38dca00ecb39dc38836761767a4ec07dd95e62e124ac3/av-17.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f8ef8e8f1a0cbb2e0ad49266015e2277801a916e2186ac9451b493ff6dfdec27", size = 38815129, upload-time = "2026-03-14T14:38:01.277Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/bd/e42536234e37caffd1a054de1a0e6abca226c5686e9672726a8d95511422/av-17.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:a795e153ff31a6430e974b4e6ad0d0fab695b78e3f17812293a0a34cd03ee6a9", size = 28984602, upload-time = "2026-03-14T14:38:03.632Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/fb/55e3b5b5d1fc61466292f26fbcbabafa2642f378dc48875f8f554591e1a4/av-17.0.0-cp311-abi3-macosx_11_0_x86_64.whl", hash = "sha256:ed4013fac77c309a4a68141dcf6148f1821bb1073a36d4289379762a6372f711", size = 23238424, upload-time = "2026-03-14T14:38:05.856Z" },
-    { url = "https://files.pythonhosted.org/packages/52/03/9ace1acc08bc9ae38c14bf3a4b1360e995e4d999d1d33c2cbd7c9e77582a/av-17.0.0-cp311-abi3-macosx_14_0_arm64.whl", hash = "sha256:e44b6c83e9f3be9f79ee87d0b77a27cea9a9cd67bd630362c86b7e56a748dfbb", size = 18709043, upload-time = "2026-03-14T14:38:08.288Z" },
-    { url = "https://files.pythonhosted.org/packages/00/c0/637721f3cd5bb8bd16105a1a08efd781fc12f449931bdb3a4d0cfd63fa55/av-17.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:b440da6ac47da0629d509316f24bcd858f33158dbdd0f1b7293d71e99beb26de", size = 34018780, upload-time = "2026-03-14T14:38:10.45Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/59/d19bc3257dd985d55337d7f0414c019414b97e16cd3690ebf9941a847543/av-17.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1060cba85f97f4a337311169d92c0b5e143452cfa5ca0e65fa499d7955e8592e", size = 36358757, upload-time = "2026-03-14T14:38:13.092Z" },
-    { url = "https://files.pythonhosted.org/packages/52/6c/a1f4f2677bae6f2ade7a8a18e90ebdcf70690c9b1c4e40e118aa30fa313f/av-17.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:deda202e6021cfc7ba3e816897760ec5431309d59a4da1f75df3c0e9413d71e7", size = 35195281, upload-time = "2026-03-14T14:38:15.789Z" },
-    { url = "https://files.pythonhosted.org/packages/90/ea/52b0fc6f69432c7bf3f5fbe6f707113650aa40a1a05b9096ffc2bba4f77d/av-17.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ffaf266a1a9c2148072de0a4b5ae98061465178d2cfaa69ee089761149342974", size = 37444817, upload-time = "2026-03-14T14:38:18.563Z" },
-    { url = "https://files.pythonhosted.org/packages/34/ad/d2172966282cb8f146c13b6be7416efefde74186460c5e1708ddfc13dba6/av-17.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:45a35a40b2875bf2f98de7c952d74d960f92f319734e6d28e03b4c62a49e6f49", size = 28888553, upload-time = "2026-03-14T14:38:21.223Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/bb/c5a4c4172c514d631fb506e6366b503576b8c7f29809cf42aca73e28ff01/av-17.0.0-cp311-abi3-win_arm64.whl", hash = "sha256:3d32e9b5c5bbcb872a0b6917b352a1db8a42142237826c9b49a36d5dbd9e9c26", size = 21916910, upload-time = "2026-03-14T14:38:23.706Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/8e/c40ac08e63f79387c59f6ecc38f47d4c942b549130eee579ec1a91f6a291/av-17.0.0-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:d13250fb4b4522e9a6bec32da082556d5f257110ea223758151375748d9bbe25", size = 23483029, upload-time = "2026-03-14T14:38:25.758Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/fb/b4419494bfc249163ec393c613966d66db7e95c76da3345711cd115a79df/av-17.0.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:dbb56aa3b7ae72451d1bf6e9d37c7d83d39b97af712f73583ff419fbf08fc237", size = 18920446, upload-time = "2026-03-14T14:38:27.905Z" },
-    { url = "https://files.pythonhosted.org/packages/30/62/c2306d91602ddad2c56106f21dcb334fd51d5ea2e952f7fa025bb8aa39fc/av-17.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a213ac9e83b7ab12c2e9f277a09cac8e9d85cf0883efdab7a87a60e2e4e48879", size = 37477266, upload-time = "2026-03-14T14:38:30.404Z" },
-    { url = "https://files.pythonhosted.org/packages/28/cd/c8510a9607886785c0b3ca019d503e888c3757529be42a7287fe2bfa92d5/av-17.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:e15c88bb0921f9435bcc5a27a0863dba571a80ad5e1389c4fcf2073833bb4a74", size = 39572988, upload-time = "2026-03-14T14:38:32.984Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/2d/207d9361e25b5abec9be335bbab4df6b6b838e2214be4b374f4cfb285427/av-17.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:096cfd1e9fc896506726c7c42aaf9b370e78c2f257cde4d6ddb6c889bfcc49ec", size = 38399591, upload-time = "2026-03-14T14:38:35.465Z" },
-    { url = "https://files.pythonhosted.org/packages/73/ca/307740c6aa2980966bf11383ffcb04bacc5b13f3d268ab4cfb274ad6f793/av-17.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3649ab3d2c7f58049ded1a36e100c0d8fd529cf258f41dd88678ba824034d8c9", size = 40590681, upload-time = "2026-03-14T14:38:38.269Z" },
-    { url = "https://files.pythonhosted.org/packages/35/f2/6fdb26d0651adf409864cb2a0d60da107e467d3d1aabc94b234ead54324a/av-17.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:e5002271ab2135b551d980c2db8f3299d452e3b9d3633f24f6bb57fffe91cd10", size = 29216337, upload-time = "2026-03-14T14:38:40.83Z" },
-    { url = "https://files.pythonhosted.org/packages/41/0a/0896b829a39b5669a2d811e1a79598de661693685cd62b31f11d0c18e65b/av-17.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dba98603fc4665b4f750de86fbaf6c0cfaece970671a9b529e0e3d1711e8367e", size = 22071058, upload-time = "2026-03-14T14:38:43.663Z" },
+version = "16.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/78/cd/3a83ffbc3cc25b39721d174487fb0d51a76582f4a1703f98e46170ce83d4/av-16.1.0.tar.gz", hash = "sha256:a094b4fd87a3721dacf02794d3d2c82b8d712c85b9534437e82a8a978c175ffd", size = 4285203, upload-time = "2026-01-11T07:31:33.772Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/97/51/2217a9249409d2e88e16e3f16f7c0def9fd3e7ffc4238b2ec211f9935bdb/av-16.1.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:2395748b0c34fe3a150a1721e4f3d4487b939520991b13e7b36f8926b3b12295", size = 26942590, upload-time = "2026-01-09T20:17:58.588Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/cd/a7070f4febc76a327c38808e01e2ff6b94531fe0b321af54ea3915165338/av-16.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:72d7ac832710a158eeb7a93242370aa024a7646516291c562ee7f14a7ea881fd", size = 21507910, upload-time = "2026-01-09T20:18:02.309Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/30/ec812418cd9b297f0238fe20eb0747d8a8b68d82c5f73c56fe519a274143/av-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6cbac833092e66b6b0ac4d81ab077970b8ca874951e9c3974d41d922aaa653ed", size = 38738309, upload-time = "2026-01-09T20:18:04.701Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/b8/6c5795bf1f05f45c5261f8bce6154e0e5e86b158a6676650ddd77c28805e/av-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:eb990672d97c18f99c02f31c8d5750236f770ffe354b5a52c5f4d16c5e65f619", size = 40293006, upload-time = "2026-01-09T20:18:07.238Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/44/5e183bcb9333fc3372ee6e683be8b0c9b515a506894b2d32ff465430c074/av-16.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:05ad70933ac3b8ef896a820ea64b33b6cca91a5fac5259cb9ba7fa010435be15", size = 40123516, upload-time = "2026-01-09T20:18:09.955Z" },
+    { url = "https://files.pythonhosted.org/packages/12/1d/b5346d582a3c3d958b4d26a2cc63ce607233582d956121eb20d2bbe55c2e/av-16.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d831a1062a3c47520bf99de6ec682bd1d64a40dfa958e5457bb613c5270e7ce3", size = 41463289, upload-time = "2026-01-09T20:18:12.459Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/31/acc946c0545f72b8d0d74584cb2a0ade9b7dfe2190af3ef9aa52a2e3c0b1/av-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:358ab910fef3c5a806c55176f2b27e5663b33c4d0a692dafeb049c6ed71f8aff", size = 31754959, upload-time = "2026-01-09T20:18:14.718Z" },
+    { url = "https://files.pythonhosted.org/packages/48/d0/b71b65d1b36520dcb8291a2307d98b7fc12329a45614a303ff92ada4d723/av-16.1.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e88ad64ee9d2b9c4c5d891f16c22ae78e725188b8926eb88187538d9dd0b232f", size = 26927747, upload-time = "2026-01-09T20:18:16.976Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/79/720a5a6ccdee06eafa211b945b0a450e3a0b8fc3d12922f0f3c454d870d2/av-16.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cb296073fa6935724de72593800ba86ae49ed48af03960a4aee34f8a611f442b", size = 21492232, upload-time = "2026-01-09T20:18:19.266Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/4f/a1ba8d922f2f6d1a3d52419463ef26dd6c4d43ee364164a71b424b5ae204/av-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:720edd4d25aa73723c1532bb0597806d7b9af5ee34fc02358782c358cfe2f879", size = 39291737, upload-time = "2026-01-09T20:18:21.513Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/31/fc62b9fe8738d2693e18d99f040b219e26e8df894c10d065f27c6b4f07e3/av-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c7f2bc703d0df260a1fdf4de4253c7f5500ca9fc57772ea241b0cb241bcf972e", size = 40846822, upload-time = "2026-01-09T20:18:24.275Z" },
+    { url = "https://files.pythonhosted.org/packages/53/10/ab446583dbce730000e8e6beec6ec3c2753e628c7f78f334a35cad0317f4/av-16.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d69c393809babada7d54964d56099e4b30a3e1f8b5736ca5e27bd7be0e0f3c83", size = 40675604, upload-time = "2026-01-09T20:18:26.866Z" },
+    { url = "https://files.pythonhosted.org/packages/31/d7/1003be685277005f6d63fd9e64904ee222fe1f7a0ea70af313468bb597db/av-16.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:441892be28582356d53f282873c5a951592daaf71642c7f20165e3ddcb0b4c63", size = 42015955, upload-time = "2026-01-09T20:18:29.461Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/4a/fa2a38ee9306bf4579f556f94ecbc757520652eb91294d2a99c7cf7623b9/av-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:273a3e32de64819e4a1cd96341824299fe06f70c46f2288b5dc4173944f0fd62", size = 31750339, upload-time = "2026-01-09T20:18:32.249Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/84/2535f55edcd426cebec02eb37b811b1b0c163f26b8d3f53b059e2ec32665/av-16.1.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:640f57b93f927fba8689f6966c956737ee95388a91bd0b8c8b5e0481f73513d6", size = 26945785, upload-time = "2026-01-09T20:18:34.486Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/17/ffb940c9e490bf42e86db4db1ff426ee1559cd355a69609ec1efe4d3a9eb/av-16.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:ae3fb658eec00852ebd7412fdc141f17f3ddce8afee2d2e1cf366263ad2a3b35", size = 21481147, upload-time = "2026-01-09T20:18:36.716Z" },
+    { url = "https://files.pythonhosted.org/packages/15/c1/e0d58003d2d83c3921887d5c8c9b8f5f7de9b58dc2194356a2656a45cfdc/av-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ee558d9c02a142eebcbe55578a6d817fedfde42ff5676275504e16d07a7f86", size = 39517197, upload-time = "2026-01-11T09:57:31.937Z" },
+    { url = "https://files.pythonhosted.org/packages/32/77/787797b43475d1b90626af76f80bfb0c12cfec5e11eafcfc4151b8c80218/av-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7ae547f6d5fa31763f73900d43901e8c5fa6367bb9a9840978d57b5a7ae14ed2", size = 41174337, upload-time = "2026-01-11T09:57:35.792Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/ac/d90df7f1e3b97fc5554cf45076df5045f1e0a6adf13899e10121229b826c/av-16.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8cf065f9d438e1921dc31fc7aa045790b58aee71736897866420d80b5450f62a", size = 40817720, upload-time = "2026-01-11T09:57:39.039Z" },
+    { url = "https://files.pythonhosted.org/packages/80/6f/13c3a35f9dbcebafd03fe0c4cbd075d71ac8968ec849a3cfce406c35a9d2/av-16.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a345877a9d3cc0f08e2bc4ec163ee83176864b92587afb9d08dff50f37a9a829", size = 42267396, upload-time = "2026-01-11T09:57:42.115Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/b9/275df9607f7fb44317ccb1d4be74827185c0d410f52b6e2cd770fe209118/av-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:f49243b1d27c91cd8c66fdba90a674e344eb8eb917264f36117bf2b6879118fd", size = 31752045, upload-time = "2026-01-11T09:57:45.106Z" },
+    { url = "https://files.pythonhosted.org/packages/75/2a/63797a4dde34283dd8054219fcb29294ba1c25d68ba8c8c8a6ae53c62c45/av-16.1.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:ce2a1b3d8bf619f6c47a9f28cfa7518ff75ddd516c234a4ee351037b05e6a587", size = 26916715, upload-time = "2026-01-11T09:57:47.682Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/c4/0b49cf730d0ae8cda925402f18ae814aef351f5772d14da72dd87ff66448/av-16.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:408dbe6a2573ca58a855eb8cd854112b33ea598651902c36709f5f84c991ed8e", size = 21452167, upload-time = "2026-01-11T09:57:50.606Z" },
+    { url = "https://files.pythonhosted.org/packages/51/23/408806503e8d5d840975aad5699b153aaa21eb6de41ade75248a79b7a37f/av-16.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:57f657f86652a160a8a01887aaab82282f9e629abf94c780bbdbb01595d6f0f7", size = 39215659, upload-time = "2026-01-11T09:57:53.757Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/19/a8528d5bba592b3903f44c28dab9cc653c95fcf7393f382d2751a1d1523e/av-16.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:adbad2b355c2ee4552cac59762809d791bda90586d134a33c6f13727fb86cb3a", size = 40874970, upload-time = "2026-01-11T09:57:56.802Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/24/2dbcdf0e929ad56b7df078e514e7bd4ca0d45cba798aff3c8caac097d2f7/av-16.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f42e1a68ec2aebd21f7eb6895be69efa6aa27eec1670536876399725bbda4b99", size = 40530345, upload-time = "2026-01-11T09:58:00.421Z" },
+    { url = "https://files.pythonhosted.org/packages/54/27/ae91b41207f34e99602d1c72ab6ffd9c51d7c67e3fbcd4e3a6c0e54f882c/av-16.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58fe47aeaef0f100c40ec8a5de9abbd37f118d3ca03829a1009cf288e9aef67c", size = 41972163, upload-time = "2026-01-11T09:58:03.756Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/7a/22158fb923b2a9a00dfab0e96ef2e8a1763a94dd89e666a5858412383d46/av-16.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:565093ebc93b2f4b76782589564869dadfa83af5b852edebedd8fee746457d06", size = 31729230, upload-time = "2026-01-11T09:58:07.254Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/f1/878f8687d801d6c4565d57ebec08449c46f75126ebca8e0fed6986599627/av-16.1.0-cp313-cp313t-macosx_11_0_x86_64.whl", hash = "sha256:574081a24edb98343fd9f473e21ae155bf61443d4ec9d7708987fa597d6b04b2", size = 27008769, upload-time = "2026-01-11T09:58:10.266Z" },
+    { url = "https://files.pythonhosted.org/packages/30/f1/bd4ce8c8b5cbf1d43e27048e436cbc9de628d48ede088a1d0a993768eb86/av-16.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:9ab00ea29c25ebf2ea1d1e928d7babb3532d562481c5d96c0829212b70756ad0", size = 21590588, upload-time = "2026-01-11T09:58:12.629Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/dd/c81f6f9209201ff0b5d5bed6da6c6e641eef52d8fbc930d738c3f4f6f75d/av-16.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a84a91188c1071f238a9523fd42dbe567fb2e2607b22b779851b2ce0eac1b560", size = 40638029, upload-time = "2026-01-11T09:58:15.399Z" },
+    { url = "https://files.pythonhosted.org/packages/15/4d/07edff82b78d0459a6e807e01cd280d3180ce832efc1543de80d77676722/av-16.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c2cd0de4dd022a7225ff224fde8e7971496d700be41c50adaaa26c07bb50bf97", size = 41970776, upload-time = "2026-01-11T09:58:19.075Z" },
+    { url = "https://files.pythonhosted.org/packages/da/9d/1f48b354b82fa135d388477cd1b11b81bdd4384bd6a42a60808e2ec2d66b/av-16.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0816143530624a5a93bc5494f8c6eeaf77549b9366709c2ac8566c1e9bff6df5", size = 41764751, upload-time = "2026-01-11T09:58:22.788Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/c7/a509801e98db35ec552dd79da7bdbcff7104044bfeb4c7d196c1ce121593/av-16.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e3a28053af29644696d0c007e897d19b1197585834660a54773e12a40b16974c", size = 43034355, upload-time = "2026-01-11T09:58:26.125Z" },
+    { url = "https://files.pythonhosted.org/packages/36/8b/e5f530d9e8f640da5f5c5f681a424c65f9dd171c871cd255d8a861785a6e/av-16.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2e3e67144a202b95ed299d165232533989390a9ea3119d37eccec697dc6dbb0c", size = 31947047, upload-time = "2026-01-11T09:58:31.867Z" },
+    { url = "https://files.pythonhosted.org/packages/df/18/8812221108c27d19f7e5f486a82c827923061edf55f906824ee0fcaadf50/av-16.1.0-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:39a634d8e5a87e78ea80772774bfd20c0721f0d633837ff185f36c9d14ffede4", size = 26916179, upload-time = "2026-01-11T09:58:36.506Z" },
+    { url = "https://files.pythonhosted.org/packages/38/ef/49d128a9ddce42a2766fe2b6595bd9c49e067ad8937a560f7838a541464e/av-16.1.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:0ba32fb9e9300948a7fa9f8a3fc686e6f7f77599a665c71eb2118fdfd2c743f9", size = 21460168, upload-time = "2026-01-11T09:58:39.231Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/a9/b310d390844656fa74eeb8c2750e98030877c75b97551a23a77d3f982741/av-16.1.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:ca04d17815182d34ce3edc53cbda78a4f36e956c0fd73e3bab249872a831c4d7", size = 39210194, upload-time = "2026-01-11T09:58:42.138Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/7b/e65aae179929d0f173af6e474ad1489b5b5ad4c968a62c42758d619e54cf/av-16.1.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ee0e8de2e124a9ef53c955fe2add6ee7c56cc8fd83318265549e44057db77142", size = 40811675, upload-time = "2026-01-11T09:58:45.871Z" },
+    { url = "https://files.pythonhosted.org/packages/54/3f/5d7edefd26b6a5187d6fac0f5065ee286109934f3dea607ef05e53f05b31/av-16.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:22bf77a2f658827043a1e184b479c3bf25c4c43ab32353677df2d119f080e28f", size = 40543942, upload-time = "2026-01-11T09:58:49.759Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/24/f8b17897b67be0900a211142f5646a99d896168f54d57c81f3e018853796/av-16.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2dd419d262e6a71cab206d80bbf28e0a10d0f227b671cdf5e854c028faa2d043", size = 41924336, upload-time = "2026-01-11T09:58:53.344Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/cf/d32bc6bbbcf60b65f6510c54690ed3ae1c4ca5d9fafbce835b6056858686/av-16.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:53585986fd431cd436f290fba662cfb44d9494fbc2949a183de00acc5b33fa88", size = 31735077, upload-time = "2026-01-11T09:58:56.684Z" },
+    { url = "https://files.pythonhosted.org/packages/53/f4/9b63dc70af8636399bd933e9df4f3025a0294609510239782c1b746fc796/av-16.1.0-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:76f5ed8495cf41e1209a5775d3699dc63fdc1740b94a095e2485f13586593205", size = 27014423, upload-time = "2026-01-11T09:58:59.703Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/da/787a07a0d6ed35a0888d7e5cfb8c2ffa202f38b7ad2c657299fac08eb046/av-16.1.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:8d55397190f12a1a3ae7538be58c356cceb2bf50df1b33523817587748ce89e5", size = 21595536, upload-time = "2026-01-11T09:59:02.508Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/f4/9a7d8651a611be6e7e3ab7b30bb43779899c8cac5f7293b9fb634c44a3f3/av-16.1.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:9d51d9037437218261b4bbf9df78a95e216f83d7774fbfe8d289230b5b2e28e2", size = 40642490, upload-time = "2026-01-11T09:59:05.842Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/e4/eb79bc538a94b4ff93cd4237d00939cba797579f3272490dd0144c165a21/av-16.1.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0ce07a89c15644407f49d942111ca046e323bbab0a9078ff43ee57c9b4a50dad", size = 41976905, upload-time = "2026-01-11T09:59:09.169Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/f5/f6db0dd86b70167a4d55ee0d9d9640983c570d25504f2bde42599f38241e/av-16.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:cac0c074892ea97113b53556ff41c99562db7b9f09f098adac1f08318c2acad5", size = 41770481, upload-time = "2026-01-11T09:59:12.74Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8b/33651d658e45e16ab7671ea5fcf3d20980ea7983234f4d8d0c63c65581a5/av-16.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7dec3dcbc35a187ce450f65a2e0dda820d5a9e6553eea8344a1459af11c98649", size = 43036824, upload-time = "2026-01-11T09:59:16.507Z" },
+    { url = "https://files.pythonhosted.org/packages/83/41/7f13361db54d7e02f11552575c0384dadaf0918138f4eaa82ea03a9f9580/av-16.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6f90dc082ff2068ddbe77618400b44d698d25d9c4edac57459e250c16b33d700", size = 31948164, upload-time = "2026-01-11T09:59:19.501Z" },
 ]
 
 [[package]]
@@ -601,15 +631,14 @@ wheels = [
 
 [[package]]
 name = "bitstring"
-version = "4.4.0"
+version = "4.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "bitarray" },
-    { name = "tibs" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/36/d3/de6fe4e7065df8c2f1ac1766f5fdccbe75bc18af2cf2dbeecd34d68e1518/bitstring-4.4.0.tar.gz", hash = "sha256:e682ac522bb63e041d16cbc9d0ca86a4f00194db16d0847c7efe066f836b2e37", size = 255209, upload-time = "2026-03-10T20:29:14.824Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/a8/a80c890db75d5bdd5314b5de02c4144c7de94fd0cefcae51acaeb14c6a3f/bitstring-4.3.1.tar.gz", hash = "sha256:a08bc09d3857216d4c0f412a1611056f1cc2b64fd254fb1e8a0afba7cfa1a95a", size = 251426, upload-time = "2025-03-22T09:39:06.978Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bf/02/1a870bab76f2896d827aa4963be95e56675ffa1453e53525d13c43036edf/bitstring-4.4.0-py3-none-any.whl", hash = "sha256:feac49524fcf3ef27e6081e86f02b10d2adf6c3773bf22fbe0e7eea9534bc737", size = 76846, upload-time = "2026-03-10T20:29:12.832Z" },
+    { url = "https://files.pythonhosted.org/packages/75/2d/174566b533755ddf8efb32a5503af61c756a983de379f8ad3aed6a982d38/bitstring-4.3.1-py3-none-any.whl", hash = "sha256:69d1587f0ac18dc7d93fc7e80d5f447161a33e57027e726dc18a0a8bacf1711a", size = 71930, upload-time = "2025-03-22T09:39:05.163Z" },
 ]
 
 [[package]]
@@ -653,16 +682,16 @@ wheels = [
 
 [[package]]
 name = "botocore"
-version = "1.42.61"
+version = "1.41.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jmespath" },
     { name = "python-dateutil" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d1/6a/27836dde004717c496f69f4fe28fa2f3f3762d04859a9292681944a45a36/botocore-1.42.61.tar.gz", hash = "sha256:702d6011ace2b5b652a0dbb45053d4d9f79da2c5b184463042434e1754bdd601", size = 14954743, upload-time = "2026-03-04T20:30:41.956Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/90/22/7fe08c726a2e3b11a0aef8bf177e83891c9cb2dc1809d35c9ed91a9e60e6/botocore-1.41.5.tar.gz", hash = "sha256:0367622b811597d183bfcaab4a350f0d3ede712031ce792ef183cabdee80d3bf", size = 14668152, upload-time = "2025-11-26T20:27:38.026Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/46/98a01139f318b7a2f0ad1d1e3be2a028d13aeb7e05aaa340a27cdc47fdf0/botocore-1.42.61-py3-none-any.whl", hash = "sha256:476059beb3f462042742950cf195d26bc313461a77189c16e37e205b0a924b26", size = 14627717, upload-time = "2026-03-04T20:30:37.503Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/4e/21cd0b8f365449f1576f93de1ec8718ed18a7a3bc086dfbdeb79437bba7a/botocore-1.41.5-py3-none-any.whl", hash = "sha256:3fef7fcda30c82c27202d232cfdbd6782cb27f20f8e7e21b20606483e66ee73a", size = 14337008, upload-time = "2025-11-26T20:27:35.208Z" },
 ]
 
 [[package]]
@@ -694,22 +723,22 @@ wheels = [
 
 [[package]]
 name = "causal-conv1d"
-version = "1.6.1"
+version = "1.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ninja" },
     { name = "packaging" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/63/15/ec51d77a2df03ee93410f8ee97fceeb7181da213813c51243e9dd6d7e144/causal_conv1d-1.6.1.tar.gz", hash = "sha256:e4a697ec2db3906f012e675125569f8b510b4559bc53e3095143d91369e1221b", size = 29426, upload-time = "2026-03-10T08:56:35.305Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/db/df/63a384c49743b9fc8fec4c05dbd0b515e1c1c2b07e4559acc4fc37c69223/causal_conv1d-1.6.0.tar.gz", hash = "sha256:4eae3220d08e1e88238f3a0a88783147cbdf47f612cc610add75127c7a37ca3e", size = 29356, upload-time = "2026-01-12T17:33:32.794Z" }
 
 [[package]]
 name = "certifi"
-version = "2026.2.25"
+version = "2026.1.4"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
 ]
 
 [[package]]
@@ -796,107 +825,91 @@ wheels = [
 
 [[package]]
 name = "charset-normalizer"
-version = "3.4.6"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/7b/60/e3bec1881450851b087e301bedc3daa9377a4d45f1c26aa90b0b235e38aa/charset_normalizer-3.4.6.tar.gz", hash = "sha256:1ae6b62897110aa7c79ea2f5dd38d1abca6db663687c0b1ad9aed6f6bae3d9d6", size = 143363, upload-time = "2026-03-15T18:53:25.478Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e6/8c/2c56124c6dc53a774d435f985b5973bc592f42d437be58c0c92d65ae7296/charset_normalizer-3.4.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2e1d8ca8611099001949d1cdfaefc510cf0f212484fe7c565f735b68c78c3c95", size = 298751, upload-time = "2026-03-15T18:50:00.003Z" },
-    { url = "https://files.pythonhosted.org/packages/86/2a/2a7db6b314b966a3bcad8c731c0719c60b931b931de7ae9f34b2839289ee/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e25369dc110d58ddf29b949377a93e0716d72a24f62bad72b2b39f155949c1fd", size = 200027, upload-time = "2026-03-15T18:50:01.702Z" },
-    { url = "https://files.pythonhosted.org/packages/68/f2/0fe775c74ae25e2a3b07b01538fc162737b3e3f795bada3bc26f4d4d495c/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:259695e2ccc253feb2a016303543d691825e920917e31f894ca1a687982b1de4", size = 220741, upload-time = "2026-03-15T18:50:03.194Z" },
-    { url = "https://files.pythonhosted.org/packages/10/98/8085596e41f00b27dd6aa1e68413d1ddda7e605f34dd546833c61fddd709/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:dda86aba335c902b6149a02a55b38e96287157e609200811837678214ba2b1db", size = 215802, upload-time = "2026-03-15T18:50:05.859Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/ce/865e4e09b041bad659d682bbd98b47fb490b8e124f9398c9448065f64fee/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51fb3c322c81d20567019778cb5a4a6f2dc1c200b886bc0d636238e364848c89", size = 207908, upload-time = "2026-03-15T18:50:07.676Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/54/8c757f1f7349262898c2f169e0d562b39dcb977503f18fdf0814e923db78/charset_normalizer-3.4.6-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:4482481cb0572180b6fd976a4d5c72a30263e98564da68b86ec91f0fe35e8565", size = 194357, upload-time = "2026-03-15T18:50:09.327Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/29/e88f2fac9218907fc7a70722b393d1bbe8334c61fe9c46640dba349b6e66/charset_normalizer-3.4.6-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:39f5068d35621da2881271e5c3205125cc456f54e9030d3f723288c873a71bf9", size = 205610, upload-time = "2026-03-15T18:50:10.732Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/c5/21d7bb0cb415287178450171d130bed9d664211fdd59731ed2c34267b07d/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8bea55c4eef25b0b19a0337dc4e3f9a15b00d569c77211fa8cde38684f234fb7", size = 203512, upload-time = "2026-03-15T18:50:12.535Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/be/ce52f3c7fdb35cc987ad38a53ebcef52eec498f4fb6c66ecfe62cfe57ba2/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:f0cdaecd4c953bfae0b6bb64910aaaca5a424ad9c72d85cb88417bb9814f7550", size = 195398, upload-time = "2026-03-15T18:50:14.236Z" },
-    { url = "https://files.pythonhosted.org/packages/81/a0/3ab5dd39d4859a3555e5dadfc8a9fa7f8352f8c183d1a65c90264517da0e/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:150b8ce8e830eb7ccb029ec9ca36022f756986aaaa7956aad6d9ec90089338c0", size = 221772, upload-time = "2026-03-15T18:50:15.581Z" },
-    { url = "https://files.pythonhosted.org/packages/04/6e/6a4e41a97ba6b2fa87f849c41e4d229449a586be85053c4d90135fe82d26/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:e68c14b04827dd76dcbd1aeea9e604e3e4b78322d8faf2f8132c7138efa340a8", size = 205759, upload-time = "2026-03-15T18:50:17.047Z" },
-    { url = "https://files.pythonhosted.org/packages/db/3b/34a712a5ee64a6957bf355b01dc17b12de457638d436fdb05d01e463cd1c/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:3778fd7d7cd04ae8f54651f4a7a0bd6e39a0cf20f801720a4c21d80e9b7ad6b0", size = 216938, upload-time = "2026-03-15T18:50:18.44Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/05/5bd1e12da9ab18790af05c61aafd01a60f489778179b621ac2a305243c62/charset_normalizer-3.4.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:dad6e0f2e481fffdcf776d10ebee25e0ef89f16d691f1e5dee4b586375fdc64b", size = 210138, upload-time = "2026-03-15T18:50:19.852Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/8e/3cb9e2d998ff6b21c0a1860343cb7b83eba9cdb66b91410e18fc4969d6ab/charset_normalizer-3.4.6-cp310-cp310-win32.whl", hash = "sha256:74a2e659c7ecbc73562e2a15e05039f1e22c75b7c7618b4b574a3ea9118d1557", size = 144137, upload-time = "2026-03-15T18:50:21.505Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/8f/78f5489ffadb0db3eb7aff53d31c24531d33eb545f0c6f6567c25f49a5ff/charset_normalizer-3.4.6-cp310-cp310-win_amd64.whl", hash = "sha256:aa9cccf4a44b9b62d8ba8b4dd06c649ba683e4bf04eea606d2e94cfc2d6ff4d6", size = 154244, upload-time = "2026-03-15T18:50:22.81Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/74/e472659dffb0cadb2f411282d2d76c60da1fc94076d7fffed4ae8a93ec01/charset_normalizer-3.4.6-cp310-cp310-win_arm64.whl", hash = "sha256:e985a16ff513596f217cee86c21371b8cd011c0f6f056d0920aa2d926c544058", size = 143312, upload-time = "2026-03-15T18:50:24.074Z" },
-    { url = "https://files.pythonhosted.org/packages/62/28/ff6f234e628a2de61c458be2779cb182bc03f6eec12200d4a525bbfc9741/charset_normalizer-3.4.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:82060f995ab5003a2d6e0f4ad29065b7672b6593c8c63559beefe5b443242c3e", size = 293582, upload-time = "2026-03-15T18:50:25.454Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/b7/b1a117e5385cbdb3205f6055403c2a2a220c5ea80b8716c324eaf75c5c95/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60c74963d8350241a79cb8feea80e54d518f72c26db618862a8f53e5023deaf9", size = 197240, upload-time = "2026-03-15T18:50:27.196Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/5f/2574f0f09f3c3bc1b2f992e20bce6546cb1f17e111c5be07308dc5427956/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6e4333fb15c83f7d1482a76d45a0818897b3d33f00efd215528ff7c51b8e35d", size = 217363, upload-time = "2026-03-15T18:50:28.601Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/d1/0ae20ad77bc949ddd39b51bf383b6ca932f2916074c95cad34ae465ab71f/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bc72863f4d9aba2e8fd9085e63548a324ba706d2ea2c83b260da08a59b9482de", size = 212994, upload-time = "2026-03-15T18:50:30.102Z" },
-    { url = "https://files.pythonhosted.org/packages/60/ac/3233d262a310c1b12633536a07cde5ddd16985e6e7e238e9f3f9423d8eb9/charset_normalizer-3.4.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9cc4fc6c196d6a8b76629a70ddfcd4635a6898756e2d9cac5565cf0654605d73", size = 204697, upload-time = "2026-03-15T18:50:31.654Z" },
-    { url = "https://files.pythonhosted.org/packages/25/3c/8a18fc411f085b82303cfb7154eed5bd49c77035eb7608d049468b53f87c/charset_normalizer-3.4.6-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:0c173ce3a681f309f31b87125fecec7a5d1347261ea11ebbb856fa6006b23c8c", size = 191673, upload-time = "2026-03-15T18:50:33.433Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/a7/11cfe61d6c5c5c7438d6ba40919d0306ed83c9ab957f3d4da2277ff67836/charset_normalizer-3.4.6-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c907cdc8109f6c619e6254212e794d6548373cc40e1ec75e6e3823d9135d29cc", size = 201120, upload-time = "2026-03-15T18:50:35.105Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/10/cf491fa1abd47c02f69687046b896c950b92b6cd7337a27e6548adbec8e4/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:404a1e552cf5b675a87f0651f8b79f5f1e6fd100ee88dc612f89aa16abd4486f", size = 200911, upload-time = "2026-03-15T18:50:36.819Z" },
-    { url = "https://files.pythonhosted.org/packages/28/70/039796160b48b18ed466fde0af84c1b090c4e288fae26cd674ad04a2d703/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:e3c701e954abf6fc03a49f7c579cc80c2c6cc52525340ca3186c41d3f33482ef", size = 192516, upload-time = "2026-03-15T18:50:38.228Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/34/c56f3223393d6ff3124b9e78f7de738047c2d6bc40a4f16ac0c9d7a1cb3c/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7a6967aaf043bceabab5412ed6bd6bd26603dae84d5cb75bf8d9a74a4959d398", size = 218795, upload-time = "2026-03-15T18:50:39.664Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/3b/ce2d4f86c5282191a041fdc5a4ce18f1c6bd40a5bd1f74cf8625f08d51c1/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:5feb91325bbceade6afab43eb3b508c63ee53579fe896c77137ded51c6b6958e", size = 201833, upload-time = "2026-03-15T18:50:41.552Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/9b/b6a9f76b0fd7c5b5ec58b228ff7e85095370282150f0bd50b3126f5506d6/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:f820f24b09e3e779fe84c3c456cb4108a7aa639b0d1f02c28046e11bfcd088ed", size = 213920, upload-time = "2026-03-15T18:50:43.33Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/98/7bc23513a33d8172365ed30ee3a3b3fe1ece14a395e5fc94129541fc6003/charset_normalizer-3.4.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b35b200d6a71b9839a46b9b7fff66b6638bb52fc9658aa58796b0326595d3021", size = 206951, upload-time = "2026-03-15T18:50:44.789Z" },
-    { url = "https://files.pythonhosted.org/packages/32/73/c0b86f3d1458468e11aec870e6b3feac931facbe105a894b552b0e518e79/charset_normalizer-3.4.6-cp311-cp311-win32.whl", hash = "sha256:9ca4c0b502ab399ef89248a2c84c54954f77a070f28e546a85e91da627d1301e", size = 143703, upload-time = "2026-03-15T18:50:46.103Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/e3/76f2facfe8eddee0bbd38d2594e709033338eae44ebf1738bcefe0a06185/charset_normalizer-3.4.6-cp311-cp311-win_amd64.whl", hash = "sha256:a9e68c9d88823b274cf1e72f28cb5dc89c990edf430b0bfd3e2fb0785bfeabf4", size = 153857, upload-time = "2026-03-15T18:50:47.563Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/dc/9abe19c9b27e6cd3636036b9d1b387b78c40dedbf0b47f9366737684b4b0/charset_normalizer-3.4.6-cp311-cp311-win_arm64.whl", hash = "sha256:97d0235baafca5f2b09cf332cc275f021e694e8362c6bb9c96fc9a0eb74fc316", size = 142751, upload-time = "2026-03-15T18:50:49.234Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/62/c0815c992c9545347aeea7859b50dc9044d147e2e7278329c6e02ac9a616/charset_normalizer-3.4.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ef7fedc7a6ecbe99969cd09632516738a97eeb8bd7258bf8a0f23114c057dab", size = 295154, upload-time = "2026-03-15T18:50:50.88Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/37/bdca6613c2e3c58c7421891d80cc3efa1d32e882f7c4a7ee6039c3fc951a/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4ea868bc28109052790eb2b52a9ab33f3aa7adc02f96673526ff47419490e21", size = 199191, upload-time = "2026-03-15T18:50:52.658Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/92/9934d1bbd69f7f398b38c5dae1cbf9cc672e7c34a4adf7b17c0a9c17d15d/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:836ab36280f21fc1a03c99cd05c6b7af70d2697e374c7af0b61ed271401a72a2", size = 218674, upload-time = "2026-03-15T18:50:54.102Z" },
-    { url = "https://files.pythonhosted.org/packages/af/90/25f6ab406659286be929fd89ab0e78e38aa183fc374e03aa3c12d730af8a/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f1ce721c8a7dfec21fcbdfe04e8f68174183cf4e8188e0645e92aa23985c57ff", size = 215259, upload-time = "2026-03-15T18:50:55.616Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/ef/79a463eb0fff7f96afa04c1d4c51f8fc85426f918db467854bfb6a569ce3/charset_normalizer-3.4.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e28d62a8fc7a1fa411c43bd65e346f3bce9716dc51b897fbe930c5987b402d5", size = 207276, upload-time = "2026-03-15T18:50:57.054Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/72/d0426afec4b71dc159fa6b4e68f868cd5a3ecd918fec5813a15d292a7d10/charset_normalizer-3.4.6-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:530d548084c4a9f7a16ed4a294d459b4f229db50df689bfe92027452452943a0", size = 195161, upload-time = "2026-03-15T18:50:58.686Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/18/c82b06a68bfcb6ce55e508225d210c7e6a4ea122bfc0748892f3dc4e8e11/charset_normalizer-3.4.6-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:30f445ae60aad5e1f8bdbb3108e39f6fbc09f4ea16c815c66578878325f8f15a", size = 203452, upload-time = "2026-03-15T18:51:00.196Z" },
-    { url = "https://files.pythonhosted.org/packages/44/d6/0c25979b92f8adafdbb946160348d8d44aa60ce99afdc27df524379875cb/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ac2393c73378fea4e52aa56285a3d64be50f1a12395afef9cce47772f60334c2", size = 202272, upload-time = "2026-03-15T18:51:01.703Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/3d/7fea3e8fe84136bebbac715dd1221cc25c173c57a699c030ab9b8900cbb7/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:90ca27cd8da8118b18a52d5f547859cc1f8354a00cd1e8e5120df3e30d6279e5", size = 195622, upload-time = "2026-03-15T18:51:03.526Z" },
-    { url = "https://files.pythonhosted.org/packages/57/8a/d6f7fd5cb96c58ef2f681424fbca01264461336d2a7fc875e4446b1f1346/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8e5a94886bedca0f9b78fecd6afb6629142fd2605aa70a125d49f4edc6037ee6", size = 220056, upload-time = "2026-03-15T18:51:05.269Z" },
-    { url = "https://files.pythonhosted.org/packages/16/50/478cdda782c8c9c3fb5da3cc72dd7f331f031e7f1363a893cdd6ca0f8de0/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:695f5c2823691a25f17bc5d5ffe79fa90972cc34b002ac6c843bb8a1720e950d", size = 203751, upload-time = "2026-03-15T18:51:06.858Z" },
-    { url = "https://files.pythonhosted.org/packages/75/fc/cc2fcac943939c8e4d8791abfa139f685e5150cae9f94b60f12520feaa9b/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:231d4da14bcd9301310faf492051bee27df11f2bc7549bc0bb41fef11b82daa2", size = 216563, upload-time = "2026-03-15T18:51:08.564Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/b7/a4add1d9a5f68f3d037261aecca83abdb0ab15960a3591d340e829b37298/charset_normalizer-3.4.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a056d1ad2633548ca18ffa2f85c202cfb48b68615129143915b8dc72a806a923", size = 209265, upload-time = "2026-03-15T18:51:10.312Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/18/c094561b5d64a24277707698e54b7f67bd17a4f857bbfbb1072bba07c8bf/charset_normalizer-3.4.6-cp312-cp312-win32.whl", hash = "sha256:c2274ca724536f173122f36c98ce188fd24ce3dad886ec2b7af859518ce008a4", size = 144229, upload-time = "2026-03-15T18:51:11.694Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/20/0567efb3a8fd481b8f34f739ebddc098ed062a59fed41a8d193a61939e8f/charset_normalizer-3.4.6-cp312-cp312-win_amd64.whl", hash = "sha256:c8ae56368f8cc97c7e40a7ee18e1cedaf8e780cd8bc5ed5ac8b81f238614facb", size = 154277, upload-time = "2026-03-15T18:51:13.004Z" },
-    { url = "https://files.pythonhosted.org/packages/15/57/28d79b44b51933119e21f65479d0864a8d5893e494cf5daab15df0247c17/charset_normalizer-3.4.6-cp312-cp312-win_arm64.whl", hash = "sha256:899d28f422116b08be5118ef350c292b36fc15ec2daeb9ea987c89281c7bb5c4", size = 142817, upload-time = "2026-03-15T18:51:14.408Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/1d/4fdabeef4e231153b6ed7567602f3b68265ec4e5b76d6024cf647d43d981/charset_normalizer-3.4.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:11afb56037cbc4b1555a34dd69151e8e069bee82e613a73bef6e714ce733585f", size = 294823, upload-time = "2026-03-15T18:51:15.755Z" },
-    { url = "https://files.pythonhosted.org/packages/47/7b/20e809b89c69d37be748d98e84dce6820bf663cf19cf6b942c951a3e8f41/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423fb7e748a08f854a08a222b983f4df1912b1daedce51a72bd24fe8f26a1843", size = 198527, upload-time = "2026-03-15T18:51:17.177Z" },
-    { url = "https://files.pythonhosted.org/packages/37/a6/4f8d27527d59c039dce6f7622593cdcd3d70a8504d87d09eb11e9fdc6062/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d73beaac5e90173ac3deb9928a74763a6d230f494e4bfb422c217a0ad8e629bf", size = 218388, upload-time = "2026-03-15T18:51:18.934Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/9b/4770ccb3e491a9bacf1c46cc8b812214fe367c86a96353ccc6daf87b01ec/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d60377dce4511655582e300dc1e5a5f24ba0cb229005a1d5c8d0cb72bb758ab8", size = 214563, upload-time = "2026-03-15T18:51:20.374Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/58/a199d245894b12db0b957d627516c78e055adc3a0d978bc7f65ddaf7c399/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:530e8cebeea0d76bdcf93357aa5e41336f48c3dc709ac52da2bb167c5b8271d9", size = 206587, upload-time = "2026-03-15T18:51:21.807Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/70/3def227f1ec56f5c69dfc8392b8bd63b11a18ca8178d9211d7cc5e5e4f27/charset_normalizer-3.4.6-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:a26611d9987b230566f24a0a125f17fe0de6a6aff9f25c9f564aaa2721a5fb88", size = 194724, upload-time = "2026-03-15T18:51:23.508Z" },
-    { url = "https://files.pythonhosted.org/packages/58/ab/9318352e220c05efd31c2779a23b50969dc94b985a2efa643ed9077bfca5/charset_normalizer-3.4.6-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:34315ff4fc374b285ad7f4a0bf7dcbfe769e1b104230d40f49f700d4ab6bbd84", size = 202956, upload-time = "2026-03-15T18:51:25.239Z" },
-    { url = "https://files.pythonhosted.org/packages/75/13/f3550a3ac25b70f87ac98c40d3199a8503676c2f1620efbf8d42095cfc40/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ddd609f9e1af8c7bd6e2aca279c931aefecd148a14402d4e368f3171769fd", size = 201923, upload-time = "2026-03-15T18:51:26.682Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/db/c5c643b912740b45e8eec21de1bbab8e7fc085944d37e1e709d3dcd9d72f/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:80d0a5615143c0b3225e5e3ef22c8d5d51f3f72ce0ea6fb84c943546c7b25b6c", size = 195366, upload-time = "2026-03-15T18:51:28.129Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/67/3b1c62744f9b2448443e0eb160d8b001c849ec3fef591e012eda6484787c/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:92734d4d8d187a354a556626c221cd1a892a4e0802ccb2af432a1d85ec012194", size = 219752, upload-time = "2026-03-15T18:51:29.556Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/98/32ffbaf7f0366ffb0445930b87d103f6b406bc2c271563644bde8a2b1093/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:613f19aa6e082cf96e17e3ffd89383343d0d589abda756b7764cf78361fd41dc", size = 203296, upload-time = "2026-03-15T18:51:30.921Z" },
-    { url = "https://files.pythonhosted.org/packages/41/12/5d308c1bbe60cabb0c5ef511574a647067e2a1f631bc8634fcafaccd8293/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:2b1a63e8224e401cafe7739f77efd3f9e7f5f2026bda4aead8e59afab537784f", size = 215956, upload-time = "2026-03-15T18:51:32.399Z" },
-    { url = "https://files.pythonhosted.org/packages/53/e9/5f85f6c5e20669dbe56b165c67b0260547dea97dba7e187938833d791687/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6cceb5473417d28edd20c6c984ab6fee6c6267d38d906823ebfe20b03d607dc2", size = 208652, upload-time = "2026-03-15T18:51:34.214Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/11/897052ea6af56df3eef3ca94edafee410ca699ca0c7b87960ad19932c55e/charset_normalizer-3.4.6-cp313-cp313-win32.whl", hash = "sha256:d7de2637729c67d67cf87614b566626057e95c303bc0a55ffe391f5205e7003d", size = 143940, upload-time = "2026-03-15T18:51:36.15Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/5c/724b6b363603e419829f561c854b87ed7c7e31231a7908708ac086cdf3e2/charset_normalizer-3.4.6-cp313-cp313-win_amd64.whl", hash = "sha256:572d7c822caf521f0525ba1bce1a622a0b85cf47ffbdae6c9c19e3b5ac3c4389", size = 154101, upload-time = "2026-03-15T18:51:37.876Z" },
-    { url = "https://files.pythonhosted.org/packages/01/a5/7abf15b4c0968e47020f9ca0935fb3274deb87cb288cd187cad92e8cdffd/charset_normalizer-3.4.6-cp313-cp313-win_arm64.whl", hash = "sha256:a4474d924a47185a06411e0064b803c68be044be2d60e50e8bddcc2649957c1f", size = 143109, upload-time = "2026-03-15T18:51:39.565Z" },
-    { url = "https://files.pythonhosted.org/packages/25/6f/ffe1e1259f384594063ea1869bfb6be5cdb8bc81020fc36c3636bc8302a1/charset_normalizer-3.4.6-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:9cc6e6d9e571d2f863fa77700701dae73ed5f78881efc8b3f9a4398772ff53e8", size = 294458, upload-time = "2026-03-15T18:51:41.134Z" },
-    { url = "https://files.pythonhosted.org/packages/56/60/09bb6c13a8c1016c2ed5c6a6488e4ffef506461aa5161662bd7636936fb1/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5960d965e67165d75b7c7ffc60a83ec5abfc5c11b764ec13ea54fbef8b4421", size = 199277, upload-time = "2026-03-15T18:51:42.953Z" },
-    { url = "https://files.pythonhosted.org/packages/00/50/dcfbb72a5138bbefdc3332e8d81a23494bf67998b4b100703fd15fa52d81/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b3694e3f87f8ac7ce279d4355645b3c878d24d1424581b46282f24b92f5a4ae2", size = 218758, upload-time = "2026-03-15T18:51:44.339Z" },
-    { url = "https://files.pythonhosted.org/packages/03/b3/d79a9a191bb75f5aa81f3aaaa387ef29ce7cb7a9e5074ba8ea095cc073c2/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5d11595abf8dd942a77883a39d81433739b287b6aa71620f15164f8096221b30", size = 215299, upload-time = "2026-03-15T18:51:45.871Z" },
-    { url = "https://files.pythonhosted.org/packages/76/7e/bc8911719f7084f72fd545f647601ea3532363927f807d296a8c88a62c0d/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7bda6eebafd42133efdca535b04ccb338ab29467b3f7bf79569883676fc628db", size = 206811, upload-time = "2026-03-15T18:51:47.308Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/40/c430b969d41dda0c465aa36cc7c2c068afb67177bef50905ac371b28ccc7/charset_normalizer-3.4.6-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:bbc8c8650c6e51041ad1be191742b8b421d05bbd3410f43fa2a00c8db87678e8", size = 193706, upload-time = "2026-03-15T18:51:48.849Z" },
-    { url = "https://files.pythonhosted.org/packages/48/15/e35e0590af254f7df984de1323640ef375df5761f615b6225ba8deb9799a/charset_normalizer-3.4.6-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:22c6f0c2fbc31e76c3b8a86fba1a56eda6166e238c29cdd3d14befdb4a4e4815", size = 202706, upload-time = "2026-03-15T18:51:50.257Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/bd/f736f7b9cc5e93a18b794a50346bb16fbfd6b37f99e8f306f7951d27c17c/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7edbed096e4a4798710ed6bc75dcaa2a21b68b6c356553ac4823c3658d53743a", size = 202497, upload-time = "2026-03-15T18:51:52.012Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/ba/2cc9e3e7dfdf7760a6ed8da7446d22536f3d0ce114ac63dee2a5a3599e62/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:7f9019c9cb613f084481bd6a100b12e1547cf2efe362d873c2e31e4035a6fa43", size = 193511, upload-time = "2026-03-15T18:51:53.723Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/cb/5be49b5f776e5613be07298c80e1b02a2d900f7a7de807230595c85a8b2e/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:58c948d0d086229efc484fe2f30c2d382c86720f55cd9bc33591774348ad44e0", size = 220133, upload-time = "2026-03-15T18:51:55.333Z" },
-    { url = "https://files.pythonhosted.org/packages/83/43/99f1b5dad345accb322c80c7821071554f791a95ee50c1c90041c157ae99/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:419a9d91bd238052642a51938af8ac05da5b3343becde08d5cdeab9046df9ee1", size = 203035, upload-time = "2026-03-15T18:51:56.736Z" },
-    { url = "https://files.pythonhosted.org/packages/87/9a/62c2cb6a531483b55dddff1a68b3d891a8b498f3ca555fbcf2978e804d9d/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5273b9f0b5835ff0350c0828faea623c68bfa65b792720c453e22b25cc72930f", size = 216321, upload-time = "2026-03-15T18:51:58.17Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/79/94a010ff81e3aec7c293eb82c28f930918e517bc144c9906a060844462eb/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:0e901eb1049fdb80f5bd11ed5ea1e498ec423102f7a9b9e4645d5b8204ff2815", size = 208973, upload-time = "2026-03-15T18:51:59.998Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/57/4ecff6d4ec8585342f0c71bc03efaa99cb7468f7c91a57b105bcd561cea8/charset_normalizer-3.4.6-cp314-cp314-win32.whl", hash = "sha256:b4ff1d35e8c5bd078be89349b6f3a845128e685e751b6ea1169cf2160b344c4d", size = 144610, upload-time = "2026-03-15T18:52:02.213Z" },
-    { url = "https://files.pythonhosted.org/packages/80/94/8434a02d9d7f168c25767c64671fead8d599744a05d6a6c877144c754246/charset_normalizer-3.4.6-cp314-cp314-win_amd64.whl", hash = "sha256:74119174722c4349af9708993118581686f343adc1c8c9c007d59be90d077f3f", size = 154962, upload-time = "2026-03-15T18:52:03.658Z" },
-    { url = "https://files.pythonhosted.org/packages/46/4c/48f2cdbfd923026503dfd67ccea45c94fd8fe988d9056b468579c66ed62b/charset_normalizer-3.4.6-cp314-cp314-win_arm64.whl", hash = "sha256:e5bcc1a1ae744e0bb59641171ae53743760130600da8db48cbb6e4918e186e4e", size = 143595, upload-time = "2026-03-15T18:52:05.123Z" },
-    { url = "https://files.pythonhosted.org/packages/31/93/8878be7569f87b14f1d52032946131bcb6ebbd8af3e20446bc04053dc3f1/charset_normalizer-3.4.6-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:ad8faf8df23f0378c6d527d8b0b15ea4a2e23c89376877c598c4870d1b2c7866", size = 314828, upload-time = "2026-03-15T18:52:06.831Z" },
-    { url = "https://files.pythonhosted.org/packages/06/b6/fae511ca98aac69ecc35cde828b0a3d146325dd03d99655ad38fc2cc3293/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f5ea69428fa1b49573eef0cc44a1d43bebd45ad0c611eb7d7eac760c7ae771bc", size = 208138, upload-time = "2026-03-15T18:52:08.239Z" },
-    { url = "https://files.pythonhosted.org/packages/54/57/64caf6e1bf07274a1e0b7c160a55ee9e8c9ec32c46846ce59b9c333f7008/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:06a7e86163334edfc5d20fe104db92fcd666e5a5df0977cb5680a506fe26cc8e", size = 224679, upload-time = "2026-03-15T18:52:10.043Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/cb/9ff5a25b9273ef160861b41f6937f86fae18b0792fe0a8e75e06acb08f1d/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e1f6e2f00a6b8edb562826e4632e26d063ac10307e80f7461f7de3ad8ef3f077", size = 223475, upload-time = "2026-03-15T18:52:11.854Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/97/440635fc093b8d7347502a377031f9605a1039c958f3cd18dcacffb37743/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95b52c68d64c1878818687a473a10547b3292e82b6f6fe483808fb1468e2f52f", size = 215230, upload-time = "2026-03-15T18:52:13.325Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/24/afff630feb571a13f07c8539fbb502d2ab494019492aaffc78ef41f1d1d0/charset_normalizer-3.4.6-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:7504e9b7dc05f99a9bbb4525c67a2c155073b44d720470a148b34166a69c054e", size = 199045, upload-time = "2026-03-15T18:52:14.752Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/17/d1399ecdaf7e0498c327433e7eefdd862b41236a7e484355b8e0e5ebd64b/charset_normalizer-3.4.6-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:172985e4ff804a7ad08eebec0a1640ece87ba5041d565fff23c8f99c1f389484", size = 211658, upload-time = "2026-03-15T18:52:16.278Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/38/16baa0affb957b3d880e5ac2144caf3f9d7de7bc4a91842e447fbb5e8b67/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4be9f4830ba8741527693848403e2c457c16e499100963ec711b1c6f2049b7c7", size = 210769, upload-time = "2026-03-15T18:52:17.782Z" },
-    { url = "https://files.pythonhosted.org/packages/05/34/c531bc6ac4c21da9ddfddb3107be2287188b3ea4b53b70fc58f2a77ac8d8/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:79090741d842f564b1b2827c0b82d846405b744d31e84f18d7a7b41c20e473ff", size = 201328, upload-time = "2026-03-15T18:52:19.553Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/73/a5a1e9ca5f234519c1953608a03fe109c306b97fdfb25f09182babad51a7/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:87725cfb1a4f1f8c2fc9890ae2f42094120f4b44db9360be5d99a4c6b0e03a9e", size = 225302, upload-time = "2026-03-15T18:52:21.043Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/f6/cd782923d112d296294dea4bcc7af5a7ae0f86ab79f8fefbda5526b6cfc0/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:fcce033e4021347d80ed9c66dcf1e7b1546319834b74445f561d2e2221de5659", size = 211127, upload-time = "2026-03-15T18:52:22.491Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/c5/0b6898950627af7d6103a449b22320372c24c6feda91aa24e201a478d161/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:ca0276464d148c72defa8bb4390cce01b4a0e425f3b50d1435aa6d7a18107602", size = 222840, upload-time = "2026-03-15T18:52:24.113Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/25/c4bba773bef442cbdc06111d40daa3de5050a676fa26e85090fc54dd12f0/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:197c1a244a274bb016dd8b79204850144ef77fe81c5b797dc389327adb552407", size = 216890, upload-time = "2026-03-15T18:52:25.541Z" },
-    { url = "https://files.pythonhosted.org/packages/35/1a/05dacadb0978da72ee287b0143097db12f2e7e8d3ffc4647da07a383b0b7/charset_normalizer-3.4.6-cp314-cp314t-win32.whl", hash = "sha256:2a24157fa36980478dd1770b585c0f30d19e18f4fb0c47c13aa568f871718579", size = 155379, upload-time = "2026-03-15T18:52:27.05Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/7a/d269d834cb3a76291651256f3b9a5945e81d0a49ab9f4a498964e83c0416/charset_normalizer-3.4.6-cp314-cp314t-win_amd64.whl", hash = "sha256:cd5e2801c89992ed8c0a3f0293ae83c159a60d9a5d685005383ef4caca77f2c4", size = 169043, upload-time = "2026-03-15T18:52:28.502Z" },
-    { url = "https://files.pythonhosted.org/packages/23/06/28b29fba521a37a8932c6a84192175c34d49f84a6d4773fa63d05f9aff22/charset_normalizer-3.4.6-cp314-cp314t-win_arm64.whl", hash = "sha256:47955475ac79cc504ef2704b192364e51d0d473ad452caedd0002605f780101c", size = 148523, upload-time = "2026-03-15T18:52:29.956Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/68/687187c7e26cb24ccbd88e5069f5ef00eba804d36dde11d99aad0838ab45/charset_normalizer-3.4.6-py3-none-any.whl", hash = "sha256:947cf925bc916d90adba35a64c82aace04fa39b46b52d4630ece166655905a69", size = 61455, upload-time = "2026-03-15T18:53:23.833Z" },
+version = "3.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1f/b8/6d51fc1d52cbd52cd4ccedd5b5b2f0f6a11bbf6765c782298b0f3e808541/charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d", size = 209709, upload-time = "2025-10-14T04:40:11.385Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/af/1f9d7f7faafe2ddfb6f72a2e07a548a629c61ad510fe60f9630309908fef/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8", size = 148814, upload-time = "2025-10-14T04:40:13.135Z" },
+    { url = "https://files.pythonhosted.org/packages/79/3d/f2e3ac2bbc056ca0c204298ea4e3d9db9b4afe437812638759db2c976b5f/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad", size = 144467, upload-time = "2025-10-14T04:40:14.728Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/85/1bf997003815e60d57de7bd972c57dc6950446a3e4ccac43bc3070721856/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8", size = 162280, upload-time = "2025-10-14T04:40:16.14Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/8e/6aa1952f56b192f54921c436b87f2aaf7c7a7c3d0d1a765547d64fd83c13/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d", size = 159454, upload-time = "2025-10-14T04:40:17.567Z" },
+    { url = "https://files.pythonhosted.org/packages/36/3b/60cbd1f8e93aa25d1c669c649b7a655b0b5fb4c571858910ea9332678558/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313", size = 153609, upload-time = "2025-10-14T04:40:19.08Z" },
+    { url = "https://files.pythonhosted.org/packages/64/91/6a13396948b8fd3c4b4fd5bc74d045f5637d78c9675585e8e9fbe5636554/charset_normalizer-3.4.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e", size = 151849, upload-time = "2025-10-14T04:40:20.607Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/7a/59482e28b9981d105691e968c544cc0df3b7d6133152fb3dcdc8f135da7a/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93", size = 151586, upload-time = "2025-10-14T04:40:21.719Z" },
+    { url = "https://files.pythonhosted.org/packages/92/59/f64ef6a1c4bdd2baf892b04cd78792ed8684fbc48d4c2afe467d96b4df57/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0", size = 145290, upload-time = "2025-10-14T04:40:23.069Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/63/3bf9f279ddfa641ffa1962b0db6a57a9c294361cc2f5fcac997049a00e9c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84", size = 163663, upload-time = "2025-10-14T04:40:24.17Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/09/c9e38fc8fa9e0849b172b581fd9803bdf6e694041127933934184e19f8c3/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e", size = 151964, upload-time = "2025-10-14T04:40:25.368Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/d1/d28b747e512d0da79d8b6a1ac18b7ab2ecfd81b2944c4c710e166d8dd09c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db", size = 161064, upload-time = "2025-10-14T04:40:26.806Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/9a/31d62b611d901c3b9e5500c36aab0ff5eb442043fb3a1c254200d3d397d9/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6", size = 155015, upload-time = "2025-10-14T04:40:28.284Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f3/107e008fa2bff0c8b9319584174418e5e5285fef32f79d8ee6a430d0039c/charset_normalizer-3.4.4-cp310-cp310-win32.whl", hash = "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f", size = 99792, upload-time = "2025-10-14T04:40:29.613Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/66/e396e8a408843337d7315bab30dbf106c38966f1819f123257f5520f8a96/charset_normalizer-3.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d", size = 107198, upload-time = "2025-10-14T04:40:30.644Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/58/01b4f815bf0312704c267f2ccb6e5d42bcc7752340cd487bc9f8c3710597/charset_normalizer-3.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69", size = 100262, upload-time = "2025-10-14T04:40:32.108Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988, upload-time = "2025-10-14T04:40:33.79Z" },
+    { url = "https://files.pythonhosted.org/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324, upload-time = "2025-10-14T04:40:34.961Z" },
+    { url = "https://files.pythonhosted.org/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742, upload-time = "2025-10-14T04:40:36.105Z" },
+    { url = "https://files.pythonhosted.org/packages/62/8b/171935adf2312cd745d290ed93cf16cf0dfe320863ab7cbeeae1dcd6535f/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", size = 160863, upload-time = "2025-10-14T04:40:37.188Z" },
+    { url = "https://files.pythonhosted.org/packages/09/73/ad875b192bda14f2173bfc1bc9a55e009808484a4b256748d931b6948442/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", size = 157837, upload-time = "2025-10-14T04:40:38.435Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/fc/de9cce525b2c5b94b47c70a4b4fb19f871b24995c728e957ee68ab1671ea/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", size = 151550, upload-time = "2025-10-14T04:40:40.053Z" },
+    { url = "https://files.pythonhosted.org/packages/55/c2/43edd615fdfba8c6f2dfbd459b25a6b3b551f24ea21981e23fb768503ce1/charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", size = 149162, upload-time = "2025-10-14T04:40:41.163Z" },
+    { url = "https://files.pythonhosted.org/packages/03/86/bde4ad8b4d0e9429a4e82c1e8f5c659993a9a863ad62c7df05cf7b678d75/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", size = 150019, upload-time = "2025-10-14T04:40:42.276Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/86/a151eb2af293a7e7bac3a739b81072585ce36ccfb4493039f49f1d3cae8c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", size = 143310, upload-time = "2025-10-14T04:40:43.439Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/fe/43dae6144a7e07b87478fdfc4dbe9efd5defb0e7ec29f5f58a55aeef7bf7/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", size = 162022, upload-time = "2025-10-14T04:40:44.547Z" },
+    { url = "https://files.pythonhosted.org/packages/80/e6/7aab83774f5d2bca81f42ac58d04caf44f0cc2b65fc6db2b3b2e8a05f3b3/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", size = 149383, upload-time = "2025-10-14T04:40:46.018Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/e8/b289173b4edae05c0dde07f69f8db476a0b511eac556dfe0d6bda3c43384/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", size = 159098, upload-time = "2025-10-14T04:40:47.081Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/df/fe699727754cae3f8478493c7f45f777b17c3ef0600e28abfec8619eb49c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", size = 152991, upload-time = "2025-10-14T04:40:48.246Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/86/584869fe4ddb6ffa3bd9f491b87a01568797fb9bd8933f557dba9771beaf/charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", size = 99456, upload-time = "2025-10-14T04:40:49.376Z" },
+    { url = "https://files.pythonhosted.org/packages/65/f6/62fdd5feb60530f50f7e38b4f6a1d5203f4d16ff4f9f0952962c044e919a/charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", size = 106978, upload-time = "2025-10-14T04:40:50.844Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/9d/0710916e6c82948b3be62d9d398cb4fcf4e97b56d6a6aeccd66c4b2f2bd5/charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", size = 99969, upload-time = "2025-10-14T04:40:52.272Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
+    { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
+    { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
+    { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
+    { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
+    { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
+    { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" },
+    { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" },
+    { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" },
+    { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" },
+    { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" },
+    { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" },
+    { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" },
+    { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" },
+    { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" },
+    { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" },
+    { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
 ]
 
 [[package]]
@@ -1139,10 +1152,10 @@ wheels = [
 
 [[package]]
 name = "cuda-pathfinder"
-version = "1.4.2"
+version = "1.3.5"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/92/de/8ca2b613042550dcf9ef50c596c8b1f602afda92cf9032ac28a73f6ee410/cuda_pathfinder-1.4.2-py3-none-any.whl", hash = "sha256:eb354abc20278f8609dc5b666a24648655bef5613c6dfe78a238a6fd95566754", size = 44779, upload-time = "2026-03-10T21:57:30.974Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/b5/e4056e4058fb56519fcddf1face6fe3ff2398953b41615fafe9fb1540bf2/cuda_pathfinder-1.3.5-py3-none-any.whl", hash = "sha256:6c88220f8637cb35d2a75c620d72efebf683b248b923713d8fbe235844c1a4b9", size = 33711, upload-time = "2026-02-23T18:34:27.253Z" },
 ]
 
 [[package]]
@@ -1196,71 +1209,29 @@ wheels = [
 
 [[package]]
 name = "datasets"
-version = "2.2.1"
+version = "4.5.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'win32'",
-]
 dependencies = [
-    { name = "aiohttp", marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-    { name = "dill", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-    { name = "fsspec", extra = ["http"], marker = "(python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "huggingface-hub", marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-    { name = "multiprocess", version = "0.70.19", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-    { name = "packaging", marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-    { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-    { name = "pyarrow", marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-    { name = "requests", marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-    { name = "responses", marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-    { name = "tqdm", marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-    { name = "xxhash", marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/31/64/1e6fb2a0eb6b0d55117233cf33279ba6d680c0f031ebae81281a47c92760/datasets-2.2.1.tar.gz", hash = "sha256:d362717c4394589b516c8f397ff20a6fe720454aed877ab61d06f3bc05df9544", size = 302132, upload-time = "2022-05-11T17:02:29.543Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/2d/41e8aec8d4bad6f07adfcbc89cf743e0d31c876371d453b2936bcfa7fe34/datasets-2.2.1-py3-none-any.whl", hash = "sha256:1938f3e99599422de50b9b54fe802aca854ed130382dab0b3820c821f7ae6d5e", size = 342193, upload-time = "2022-05-11T17:02:27.047Z" },
-]
-
-[[package]]
-name = "datasets"
-version = "4.7.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'win32'",
-    "python_full_version == '3.12.*' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version < '3.11'",
-    "python_full_version >= '3.14' and sys_platform == 'win32'",
-]
-dependencies = [
-    { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'win32'" },
-    { name = "filelock", marker = "python_full_version < '3.14' or sys_platform != 'win32'" },
-    { name = "fsspec", extra = ["http"], marker = "(python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'win32' and extra == 'extra-13-megatron-core-lts') or extra == 'extra-13-megatron-core-dev'" },
-    { name = "httpx", marker = "python_full_version < '3.14' or sys_platform != 'win32'" },
-    { name = "huggingface-hub", marker = "python_full_version < '3.14' or sys_platform != 'win32'" },
-    { name = "multiprocess", version = "0.70.18", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'win32'" },
+    { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" } },
+    { name = "filelock" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, extra = ["http"], marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "httpx" },
+    { name = "huggingface-hub" },
+    { name = "multiprocess" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.11' and sys_platform != 'win32' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and sys_platform != 'win32' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "packaging", marker = "python_full_version < '3.14' or sys_platform != 'win32'" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "packaging" },
     { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.11' and sys_platform != 'win32' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and sys_platform != 'win32' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "pyarrow", marker = "python_full_version < '3.14' or sys_platform != 'win32'" },
-    { name = "pyyaml", marker = "python_full_version < '3.14' or sys_platform != 'win32'" },
-    { name = "requests", marker = "python_full_version < '3.14' or sys_platform != 'win32'" },
-    { name = "tqdm", marker = "python_full_version < '3.14' or sys_platform != 'win32'" },
-    { name = "xxhash", marker = "python_full_version < '3.14' or sys_platform != 'win32'" },
+    { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "pyarrow" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "tqdm" },
+    { name = "xxhash" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1c/9c/ba18de0b70858533e422ed6cfe0e46789473cef7fc7fc3653e23fa494730/datasets-4.7.0.tar.gz", hash = "sha256:4984cdfc65d04464da7f95205a55cb50515fd94ae3176caacb50a1b7273792e2", size = 602008, upload-time = "2026-03-09T19:01:49.298Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/55/bf/bb927bde63d649296c83e883171ae77074717c1b80fe2868b328bd0dbcbb/datasets-4.5.0.tar.gz", hash = "sha256:00c698ce1c2452e646cc5fad47fef39d3fe78dd650a8a6eb205bb45eb63cd500", size = 588384, upload-time = "2026-01-14T18:27:54.297Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1e/03/c6d9c3119cf712f638fe763e887ecaac6acbb62bf1e2acc3cbde0df340fd/datasets-4.7.0-py3-none-any.whl", hash = "sha256:d5fe3025ec6acc3b5649f10d5576dff5e054134927604e6913c1467a04adc3c2", size = 527530, upload-time = "2026-03-09T19:01:47.443Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/d5/0d563ea3c205eee226dc8053cf7682a8ac588db8acecd0eda2b587987a0b/datasets-4.5.0-py3-none-any.whl", hash = "sha256:b5d7e08096ffa407dd69e58b1c0271c9b2506140839b8d99af07375ad31b6726", size = 515196, upload-time = "2026-01-14T18:27:52.419Z" },
 ]
 
 [[package]]
@@ -1286,7 +1257,8 @@ name = "deprecated"
 version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "wrapt" },
+    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "wrapt", version = "2.1.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/49/85/12f0a49a7c4ffb70572b6c2ef13c90c88fd190debda93b23f026b25f9634/deprecated-1.3.1.tar.gz", hash = "sha256:b1b50e0ff0c1fddaa5708a2c6b0a6588bb09b892825ab2b214ac9ea9d92a5223", size = 2932523, upload-time = "2025-10-30T08:19:02.757Z" }
 wheels = [
@@ -1298,19 +1270,24 @@ name = "dill"
 version = "0.4.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version < '3.11'",
-    "python_full_version >= '3.14' and sys_platform == 'win32'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
 wheels = [
@@ -1322,7 +1299,7 @@ name = "dill"
 version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'win32'",
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/81/e1/56027a71e31b02ddc53c7d65b01e68edf64dea2932122fe7746a516f75d5/dill-0.4.1.tar.gz", hash = "sha256:423092df4182177d4d8ba8290c8a5b640c66ab35ec7da59ccfa00f6fa3eea5fa", size = 187315, upload-time = "2026-01-19T02:36:56.85Z" }
 wheels = [
@@ -1375,7 +1352,8 @@ name = "docutils"
 version = "0.21.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.11'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444, upload-time = "2024-04-23T18:57:18.24Z" }
 wheels = [
@@ -1387,18 +1365,22 @@ name = "docutils"
 version = "0.22.4"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" }
 wheels = [
@@ -1425,12 +1407,11 @@ wheels = [
 
 [[package]]
 name = "emerging-optimizers"
-version = "0.1.0"
-source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0#d5363b4a418128cd8111983b191c4b8869a9766b" }
+version = "0.2.0"
+source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.2.0#1effa026ff096b7fa1063ca2fba19d98be6e6cdf" }
 dependencies = [
-    { name = "absl-py" },
-    { name = "torch", marker = "sys_platform == 'never'" },
-    { name = "typing-extensions" },
+    { name = "absl-py", marker = "python_full_version >= '3.12'" },
+    { name = "torch", marker = "python_full_version >= '3.12' and sys_platform == 'never'" },
 ]
 
 [[package]]
@@ -1460,9 +1441,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d6/1f/e99e23ee01847147fa194e8d41cfcf2535a2dbfcb51414c541cadb15c5d7/fabric-3.2.2-py3-none-any.whl", hash = "sha256:91c47c0be68b14936c88b34da8a1f55e5710fd28397dac5d4ff2e21558113a6f", size = 59417, upload-time = "2023-08-31T01:42:03.917Z" },
 ]
 
+[[package]]
+name = "fast-hadamard-transform"
+version = "1.0.4.post1"
+source = { git = "https://github.com/Dao-AILab/fast-hadamard-transform.git?rev=f134af63deb2df17e1171a9ec1ea4a7d8604d5ca#f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" }
+dependencies = [
+    { name = "ninja" },
+    { name = "packaging" },
+    { name = "torch", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+]
+
 [[package]]
 name = "fastapi"
-version = "0.135.1"
+version = "0.133.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -1471,9 +1462,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e7/7b/f8e0211e9380f7195ba3f3d40c292594fd81ba8ec4629e3854c353aaca45/fastapi-0.135.1.tar.gz", hash = "sha256:d04115b508d936d254cea545b7312ecaa58a7b3a0f84952535b4c9afae7668cd", size = 394962, upload-time = "2026-03-01T18:18:29.369Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c2/04/ab382c7c03dd545f2c964d06e87ad0d5faa944a2434186ad9c285f5d87e0/fastapi-0.133.0.tar.gz", hash = "sha256:b900a2bf5685cdb0647a41d5900bdeafc3a9e8a28ac08c6246b76699e164d60d", size = 373265, upload-time = "2026-02-24T09:53:40.143Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e4/72/42e900510195b23a56bde950d26a51f8b723846bfcaa0286e90287f0422b/fastapi-0.135.1-py3-none-any.whl", hash = "sha256:46e2fc5745924b7c840f71ddd277382af29ce1cdb7d5eab5bf697e3fb9999c9e", size = 116999, upload-time = "2026-03-01T18:18:30.831Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/b4/023e75a2ec3f5440e380df6caf4d28edc0806d007193e6fb0707237886a4/fastapi-0.133.0-py3-none-any.whl", hash = "sha256:0a78878483d60702a1dde864c24ab349a1a53ef4db6b6f74f8cd4a2b2bc67d2f", size = 104787, upload-time = "2026-02-24T09:53:41.404Z" },
 ]
 
 [[package]]
@@ -1493,11 +1484,11 @@ wheels = [
 
 [[package]]
 name = "filelock"
-version = "3.25.2"
+version = "3.24.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/94/b8/00651a0f559862f3bb7d6f7477b192afe3f583cc5e26403b44e59a55ab34/filelock-3.25.2.tar.gz", hash = "sha256:b64ece2b38f4ca29dd3e810287aa8c48182bbecd1ae6e9ae126c9b35f1382694", size = 40480, upload-time = "2026-03-11T20:45:38.487Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/73/92/a8e2479937ff39185d20dd6a851c1a63e55849e447a55e798cc2e1f49c65/filelock-3.24.3.tar.gz", hash = "sha256:011a5644dc937c22699943ebbfc46e969cdde3e171470a6e40b9533e5a72affa", size = 37935, upload-time = "2026-02-19T00:48:20.543Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl", hash = "sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70", size = 26759, upload-time = "2026-03-11T20:45:37.437Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/0f/5d0c71a1aefeb08efff26272149e07ab922b64f46c63363756224bd6872e/filelock-3.24.3-py3-none-any.whl", hash = "sha256:426e9a4660391f7f8a810d71b0555bce9008b0a1cc342ab1f6947d37639e002d", size = 24331, upload-time = "2026-02-19T00:48:18.465Z" },
 ]
 
 [[package]]
@@ -1511,15 +1502,15 @@ wheels = [
 
 [[package]]
 name = "fla-core"
-version = "0.4.2"
+version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "einops" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/53/f9/9e05c48f92b1388a8a357141eb557ed0dd6d4bb936e1d05d35f01976657f/fla_core-0.4.2.tar.gz", hash = "sha256:e9fef6fcdf122029f9feb7dccfeb85eb9650e6aabc72d2a65b36558e9c590edd", size = 377722, upload-time = "2026-03-12T14:45:46.101Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f1/de/0d6bd5664ba2e711cabdde11ccb41ddcdd866c531e40900af3601bd7b8c6/fla_core-0.4.1.tar.gz", hash = "sha256:38ab28966eeadc2141b29e87c2bf72a8a4851e00af9d25bbbc3596b1fb53450d", size = 319608, upload-time = "2025-12-24T18:07:37.669Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ee/36/3c303f92bafea7c3f97d68bbb83d18cc42e30cd0bfb1b7cfe589360f11d6/fla_core-0.4.2-py3-none-any.whl", hash = "sha256:cba3db29380002da3cbfc0db94d6efac19aaf528900d19c05c2765e8f3cc485b", size = 510239, upload-time = "2026-03-12T14:45:43.708Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/43/945ef69eb48a14c30fd7323d3e0b560c821ae71e6d3ef979e06a901bc3b9/fla_core-0.4.1-py3-none-any.whl", hash = "sha256:93c6afe4c80fc7bc705fa8aeea6a46d2cf2d77383f9619a41863c7114c801bab", size = 437282, upload-time = "2025-12-24T18:07:34.41Z" },
 ]
 
 [[package]]
@@ -1538,22 +1529,17 @@ wheels = [
 
 [[package]]
 name = "flash-linear-attention"
-version = "0.4.2"
+version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "fla-core" },
     { name = "transformers" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d1/cb/46cc27a829a10b308927c5dbc99176906a021bb0770253699e93f3cd81a0/flash_linear_attention-0.4.2.tar.gz", hash = "sha256:f97c01ebe7cf390323af07dd3fb65ade07da16724339bf70c78607bc0c007c34", size = 148464, upload-time = "2026-03-12T14:45:46.945Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/46/83/7d8ec7ffb5229080b1c9b772338ff588cbd63282ac355ede2a12a6e174a8/flash_linear_attention-0.4.1.tar.gz", hash = "sha256:127ee7273ed15ac17f72bcf4c75e1051719d8fbe0a2d1d047e59406f36d81ee2", size = 158280, upload-time = "2025-12-24T18:07:38.812Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/60/ee/a3cba17965482b35c4990af90bad108e82c32edcb59911c37f318b5f4198/flash_linear_attention-0.4.2-py3-none-any.whl", hash = "sha256:c08be006ce4dbe1be81f54938ee8e6fc7968cfba397c8d06c7669e97b8c44c0d", size = 284661, upload-time = "2026-03-12T14:45:44.905Z" },
+    { url = "https://files.pythonhosted.org/packages/63/d5/6327559a9d5b9243b10c3984f1bcef256ed2ad06d105a3bb8f7b2979659c/flash_linear_attention-0.4.1-py3-none-any.whl", hash = "sha256:d18bdfe9d1f4b424676444eac9d50fb8433b70e5d4e0e0878b20bcbcdbea57ce", size = 287415, upload-time = "2025-12-24T18:07:35.815Z" },
 ]
 
-[[package]]
-name = "flash-mla"
-version = "1.0.0+9edee0c"
-source = { git = "https://github.com/deepseek-ai/FlashMLA?rev=9edee0c022cd0938148a18e334203b0aab43aa19#9edee0c022cd0938148a18e334203b0aab43aa19" }
-
 [[package]]
 name = "flashinfer-python"
 version = "0.5.3"
@@ -1564,7 +1550,7 @@ dependencies = [
     { name = "einops" },
     { name = "ninja" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "nvidia-cudnn-frontend" },
     { name = "nvidia-cutlass-dsl" },
     { name = "nvidia-ml-py" },
@@ -1734,11 +1720,31 @@ wheels = [
 
 [[package]]
 name = "fsspec"
-version = "2026.2.0"
+version = "2025.10.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/51/7c/f60c259dcbf4f0c47cc4ddb8f7720d2dcdc8888c8e5ad84c73ea4531cc5b/fsspec-2026.2.0.tar.gz", hash = "sha256:6544e34b16869f5aacd5b90bdf1a71acb37792ea3ddf6125ee69a22a53fb8bff", size = 313441, upload-time = "2026-02-05T21:50:53.743Z" }
+resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform == 'win32'",
+    "python_full_version >= '3.14' and sys_platform == 'emscripten'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform == 'win32'",
+    "python_full_version == '3.12.*' and sys_platform == 'win32'",
+    "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
+    "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
+    "python_full_version == '3.11.*' and sys_platform == 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e6/ab/fb21f4c939bb440104cc2b396d3be1d9b7a9fd3c6c2a53d98c45b3d7c954/fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437", size = 202505, upload-time = "2026-02-05T21:50:51.819Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
 ]
 
 [package.optional-dependencies]
@@ -1746,6 +1752,18 @@ http = [
     { name = "aiohttp" },
 ]
 
+[[package]]
+name = "fsspec"
+version = "2026.2.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/51/7c/f60c259dcbf4f0c47cc4ddb8f7720d2dcdc8888c8e5ad84c73ea4531cc5b/fsspec-2026.2.0.tar.gz", hash = "sha256:6544e34b16869f5aacd5b90bdf1a71acb37792ea3ddf6125ee69a22a53fb8bff", size = 313441, upload-time = "2026-02-05T21:50:53.743Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e6/ab/fb21f4c939bb440104cc2b396d3be1d9b7a9fd3c6c2a53d98c45b3d7c954/fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437", size = 202505, upload-time = "2026-02-05T21:50:51.819Z" },
+]
+
 [[package]]
 name = "gitdb"
 version = "4.0.12"
@@ -1788,27 +1806,28 @@ wheels = [
 
 [[package]]
 name = "google-auth"
-version = "2.49.1"
+version = "2.48.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cryptography" },
     { name = "pyasn1-modules" },
+    { name = "rsa" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ea/80/6a696a07d3d3b0a92488933532f03dbefa4a24ab80fb231395b9a2a1be77/google_auth-2.49.1.tar.gz", hash = "sha256:16d40da1c3c5a0533f57d268fe72e0ebb0ae1cc3b567024122651c045d879b64", size = 333825, upload-time = "2026-03-12T19:30:58.135Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0c/41/242044323fbd746615884b1c16639749e73665b718209946ebad7ba8a813/google_auth-2.48.0.tar.gz", hash = "sha256:4f7e706b0cd3208a3d940a19a822c37a476ddba5450156c3e6624a71f7c841ce", size = 326522, upload-time = "2026-01-26T19:22:47.157Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e9/eb/c6c2478d8a8d633460be40e2a8a6f8f429171997a35a96f81d3b680dec83/google_auth-2.49.1-py3-none-any.whl", hash = "sha256:195ebe3dca18eddd1b3db5edc5189b76c13e96f29e73043b923ebcf3f1a860f7", size = 240737, upload-time = "2026-03-12T19:30:53.159Z" },
+    { url = "https://files.pythonhosted.org/packages/83/1d/d6466de3a5249d35e832a52834115ca9d1d0de6abc22065f049707516d47/google_auth-2.48.0-py3-none-any.whl", hash = "sha256:2e2a537873d449434252a9632c28bfc268b0adb1e53f9fb62afc5333a975903f", size = 236499, upload-time = "2026-01-26T19:22:45.099Z" },
 ]
 
 [[package]]
 name = "googleapis-common-protos"
-version = "1.73.0"
+version = "1.72.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "protobuf" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/99/96/a0205167fa0154f4a542fd6925bdc63d039d88dab3588b875078107e6f06/googleapis_common_protos-1.73.0.tar.gz", hash = "sha256:778d07cd4fbeff84c6f7c72102f0daf98fa2bfd3fa8bea426edc545588da0b5a", size = 147323, upload-time = "2026-03-06T21:53:09.727Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e5/7b/adfd75544c415c487b33061fe7ae526165241c1ea133f9a9125a56b39fd8/googleapis_common_protos-1.72.0.tar.gz", hash = "sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5", size = 147433, upload-time = "2025-11-06T18:29:24.087Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/69/28/23eea8acd65972bbfe295ce3666b28ac510dfcb115fac089d3edb0feb00a/googleapis_common_protos-1.73.0-py3-none-any.whl", hash = "sha256:dfdaaa2e860f242046be561e6d6cb5c5f1541ae02cfbcb034371aadb2942b4e8", size = 297578, upload-time = "2026-03-06T21:52:33.933Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" },
 ]
 
 [[package]]
@@ -1822,63 +1841,63 @@ wheels = [
 
 [[package]]
 name = "grpcio"
-version = "1.78.0"
+version = "1.78.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/06/8a/3d098f35c143a89520e568e6539cc098fcd294495910e359889ce8741c84/grpcio-1.78.0.tar.gz", hash = "sha256:7382b95189546f375c174f53a5fa873cef91c4b8005faa05cc5b3beea9c4f1c5", size = 12852416, upload-time = "2026-02-06T09:57:18.093Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/5a/a8/690a085b4d1fe066130de97a87de32c45062cf2ecd218df9675add895550/grpcio-1.78.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:7cc47943d524ee0096f973e1081cb8f4f17a4615f2116882a5f1416e4cfe92b5", size = 5946986, upload-time = "2026-02-06T09:54:34.043Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/1b/e5213c5c0ced9d2d92778d30529ad5bb2dcfb6c48c4e2d01b1f302d33d64/grpcio-1.78.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:c3f293fdc675ccba4db5a561048cca627b5e7bd1c8a6973ffedabe7d116e22e2", size = 11816533, upload-time = "2026-02-06T09:54:37.04Z" },
-    { url = "https://files.pythonhosted.org/packages/18/37/1ba32dccf0a324cc5ace744c44331e300b000a924bf14840f948c559ede7/grpcio-1.78.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:10a9a644b5dd5aec3b82b5b0b90d41c0fa94c85ef42cb42cf78a23291ddb5e7d", size = 6519964, upload-time = "2026-02-06T09:54:40.268Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/f5/c0e178721b818072f2e8b6fde13faaba942406c634009caf065121ce246b/grpcio-1.78.0-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4c5533d03a6cbd7f56acfc9cfb44ea64f63d29091e40e44010d34178d392d7eb", size = 7198058, upload-time = "2026-02-06T09:54:42.389Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/b2/40d43c91ae9cd667edc960135f9f08e58faa1576dc95af29f66ec912985f/grpcio-1.78.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ff870aebe9a93a85283837801d35cd5f8814fe2ad01e606861a7fb47c762a2b7", size = 6727212, upload-time = "2026-02-06T09:54:44.91Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/88/9da42eed498f0efcfcd9156e48ae63c0cde3bea398a16c99fb5198c885b6/grpcio-1.78.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:391e93548644e6b2726f1bb84ed60048d4bcc424ce5e4af0843d28ca0b754fec", size = 7300845, upload-time = "2026-02-06T09:54:47.562Z" },
-    { url = "https://files.pythonhosted.org/packages/23/3f/1c66b7b1b19a8828890e37868411a6e6925df5a9030bfa87ab318f34095d/grpcio-1.78.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:df2c8f3141f7cbd112a6ebbd760290b5849cda01884554f7c67acc14e7b1758a", size = 8284605, upload-time = "2026-02-06T09:54:50.475Z" },
-    { url = "https://files.pythonhosted.org/packages/94/c4/ca1bd87394f7b033e88525384b4d1e269e8424ab441ea2fba1a0c5b50986/grpcio-1.78.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bd8cb8026e5f5b50498a3c4f196f57f9db344dad829ffae16b82e4fdbaea2813", size = 7726672, upload-time = "2026-02-06T09:54:53.11Z" },
-    { url = "https://files.pythonhosted.org/packages/41/09/f16e487d4cc65ccaf670f6ebdd1a17566b965c74fc3d93999d3b2821e052/grpcio-1.78.0-cp310-cp310-win32.whl", hash = "sha256:f8dff3d9777e5d2703a962ee5c286c239bf0ba173877cc68dc02c17d042e29de", size = 4076715, upload-time = "2026-02-06T09:54:55.549Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/32/4ce60d94e242725fd3bcc5673c04502c82a8e87b21ea411a63992dc39f8f/grpcio-1.78.0-cp310-cp310-win_amd64.whl", hash = "sha256:94f95cf5d532d0e717eed4fc1810e8e6eded04621342ec54c89a7c2f14b581bf", size = 4799157, upload-time = "2026-02-06T09:54:59.838Z" },
-    { url = "https://files.pythonhosted.org/packages/86/c7/d0b780a29b0837bf4ca9580904dfb275c1fc321ded7897d620af7047ec57/grpcio-1.78.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:2777b783f6c13b92bd7b716667452c329eefd646bfb3f2e9dabea2e05dbd34f6", size = 5951525, upload-time = "2026-02-06T09:55:01.989Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/b1/96920bf2ee61df85a9503cb6f733fe711c0ff321a5a697d791b075673281/grpcio-1.78.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:9dca934f24c732750389ce49d638069c3892ad065df86cb465b3fa3012b70c9e", size = 11830418, upload-time = "2026-02-06T09:55:04.462Z" },
-    { url = "https://files.pythonhosted.org/packages/83/0c/7c1528f098aeb75a97de2bae18c530f56959fb7ad6c882db45d9884d6edc/grpcio-1.78.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:459ab414b35f4496138d0ecd735fed26f1318af5e52cb1efbc82a09f0d5aa911", size = 6524477, upload-time = "2026-02-06T09:55:07.111Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/52/e7c1f3688f949058e19a011c4e0dec973da3d0ae5e033909677f967ae1f4/grpcio-1.78.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:082653eecbdf290e6e3e2c276ab2c54b9e7c299e07f4221872380312d8cf395e", size = 7198266, upload-time = "2026-02-06T09:55:10.016Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/61/8ac32517c1e856677282c34f2e7812d6c328fa02b8f4067ab80e77fdc9c9/grpcio-1.78.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:85f93781028ec63f383f6bc90db785a016319c561cc11151fbb7b34e0d012303", size = 6730552, upload-time = "2026-02-06T09:55:12.207Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/98/b8ee0158199250220734f620b12e4a345955ac7329cfd908d0bf0fda77f0/grpcio-1.78.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f12857d24d98441af6a1d5c87442d624411db486f7ba12550b07788f74b67b04", size = 7304296, upload-time = "2026-02-06T09:55:15.044Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/0f/7b72762e0d8840b58032a56fdbd02b78fc645b9fa993d71abf04edbc54f4/grpcio-1.78.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5397fff416b79e4b284959642a4e95ac4b0f1ece82c9993658e0e477d40551ec", size = 8288298, upload-time = "2026-02-06T09:55:17.276Z" },
-    { url = "https://files.pythonhosted.org/packages/24/ae/ae4ce56bc5bb5caa3a486d60f5f6083ac3469228faa734362487176c15c5/grpcio-1.78.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:fbe6e89c7ffb48518384068321621b2a69cab509f58e40e4399fdd378fa6d074", size = 7730953, upload-time = "2026-02-06T09:55:19.545Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/6e/8052e3a28eb6a820c372b2eb4b5e32d195c661e137d3eca94d534a4cfd8a/grpcio-1.78.0-cp311-cp311-win32.whl", hash = "sha256:6092beabe1966a3229f599d7088b38dfc8ffa1608b5b5cdda31e591e6500f856", size = 4076503, upload-time = "2026-02-06T09:55:21.521Z" },
-    { url = "https://files.pythonhosted.org/packages/08/62/f22c98c5265dfad327251fa2f840b591b1df5f5e15d88b19c18c86965b27/grpcio-1.78.0-cp311-cp311-win_amd64.whl", hash = "sha256:1afa62af6e23f88629f2b29ec9e52ec7c65a7176c1e0a83292b93c76ca882558", size = 4799767, upload-time = "2026-02-06T09:55:24.107Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/f4/7384ed0178203d6074446b3c4f46c90a22ddf7ae0b3aee521627f54cfc2a/grpcio-1.78.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:f9ab915a267fc47c7e88c387a3a28325b58c898e23d4995f765728f4e3dedb97", size = 5913985, upload-time = "2026-02-06T09:55:26.832Z" },
-    { url = "https://files.pythonhosted.org/packages/81/ed/be1caa25f06594463f685b3790b320f18aea49b33166f4141bfdc2bfb236/grpcio-1.78.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3f8904a8165ab21e07e58bf3e30a73f4dffc7a1e0dbc32d51c61b5360d26f43e", size = 11811853, upload-time = "2026-02-06T09:55:29.224Z" },
-    { url = "https://files.pythonhosted.org/packages/24/a7/f06d151afc4e64b7e3cc3e872d331d011c279aaab02831e40a81c691fb65/grpcio-1.78.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:859b13906ce098c0b493af92142ad051bf64c7870fa58a123911c88606714996", size = 6475766, upload-time = "2026-02-06T09:55:31.825Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/a8/4482922da832ec0082d0f2cc3a10976d84a7424707f25780b82814aafc0a/grpcio-1.78.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b2342d87af32790f934a79c3112641e7b27d63c261b8b4395350dad43eff1dc7", size = 7170027, upload-time = "2026-02-06T09:55:34.7Z" },
-    { url = "https://files.pythonhosted.org/packages/54/bf/f4a3b9693e35d25b24b0b39fa46d7d8a3c439e0a3036c3451764678fec20/grpcio-1.78.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:12a771591ae40bc65ba67048fa52ef4f0e6db8279e595fd349f9dfddeef571f9", size = 6690766, upload-time = "2026-02-06T09:55:36.902Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/b9/521875265cc99fe5ad4c5a17010018085cae2810a928bf15ebe7d8bcd9cc/grpcio-1.78.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:185dea0d5260cbb2d224c507bf2a5444d5abbb1fa3594c1ed7e4c709d5eb8383", size = 7266161, upload-time = "2026-02-06T09:55:39.824Z" },
-    { url = "https://files.pythonhosted.org/packages/05/86/296a82844fd40a4ad4a95f100b55044b4f817dece732bf686aea1a284147/grpcio-1.78.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:51b13f9aed9d59ee389ad666b8c2214cc87b5de258fa712f9ab05f922e3896c6", size = 8253303, upload-time = "2026-02-06T09:55:42.353Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/e4/ea3c0caf5468537f27ad5aab92b681ed7cc0ef5f8c9196d3fd42c8c2286b/grpcio-1.78.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fd5f135b1bd58ab088930b3c613455796dfa0393626a6972663ccdda5b4ac6ce", size = 7698222, upload-time = "2026-02-06T09:55:44.629Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/47/7f05f81e4bb6b831e93271fb12fd52ba7b319b5402cbc101d588f435df00/grpcio-1.78.0-cp312-cp312-win32.whl", hash = "sha256:94309f498bcc07e5a7d16089ab984d42ad96af1d94b5a4eb966a266d9fcabf68", size = 4066123, upload-time = "2026-02-06T09:55:47.644Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/e7/d6914822c88aa2974dbbd10903d801a28a19ce9cd8bad7e694cbbcf61528/grpcio-1.78.0-cp312-cp312-win_amd64.whl", hash = "sha256:9566fe4ababbb2610c39190791e5b829869351d14369603702e890ef3ad2d06e", size = 4797657, upload-time = "2026-02-06T09:55:49.86Z" },
-    { url = "https://files.pythonhosted.org/packages/05/a9/8f75894993895f361ed8636cd9237f4ab39ef87fd30db17467235ed1c045/grpcio-1.78.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:ce3a90455492bf8bfa38e56fbbe1dbd4f872a3d8eeaf7337dc3b1c8aa28c271b", size = 5920143, upload-time = "2026-02-06T09:55:52.035Z" },
-    { url = "https://files.pythonhosted.org/packages/55/06/0b78408e938ac424100100fd081189451b472236e8a3a1f6500390dc4954/grpcio-1.78.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:2bf5e2e163b356978b23652c4818ce4759d40f4712ee9ec5a83c4be6f8c23a3a", size = 11803926, upload-time = "2026-02-06T09:55:55.494Z" },
-    { url = "https://files.pythonhosted.org/packages/88/93/b59fe7832ff6ae3c78b813ea43dac60e295fa03606d14d89d2e0ec29f4f3/grpcio-1.78.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8f2ac84905d12918e4e55a16da17939eb63e433dc11b677267c35568aa63fc84", size = 6478628, upload-time = "2026-02-06T09:55:58.533Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/df/e67e3734527f9926b7d9c0dde6cd998d1d26850c3ed8eeec81297967ac67/grpcio-1.78.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b58f37edab4a3881bc6c9bca52670610e0c9ca14e2ea3cf9debf185b870457fb", size = 7173574, upload-time = "2026-02-06T09:56:01.786Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/62/cc03fffb07bfba982a9ec097b164e8835546980aec25ecfa5f9c1a47e022/grpcio-1.78.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:735e38e176a88ce41840c21bb49098ab66177c64c82426e24e0082500cc68af5", size = 6692639, upload-time = "2026-02-06T09:56:04.529Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/9a/289c32e301b85bdb67d7ec68b752155e674ee3ba2173a1858f118e399ef3/grpcio-1.78.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2045397e63a7a0ee7957c25f7dbb36ddc110e0cfb418403d110c0a7a68a844e9", size = 7268838, upload-time = "2026-02-06T09:56:08.397Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/79/1be93f32add280461fa4773880196572563e9c8510861ac2da0ea0f892b6/grpcio-1.78.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a9f136fbafe7ccf4ac7e8e0c28b31066e810be52d6e344ef954a3a70234e1702", size = 8251878, upload-time = "2026-02-06T09:56:10.914Z" },
-    { url = "https://files.pythonhosted.org/packages/65/65/793f8e95296ab92e4164593674ae6291b204bb5f67f9d4a711489cd30ffa/grpcio-1.78.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:748b6138585379c737adc08aeffd21222abbda1a86a0dca2a39682feb9196c20", size = 7695412, upload-time = "2026-02-06T09:56:13.593Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/9f/1e233fe697ecc82845942c2822ed06bb522e70d6771c28d5528e4c50f6a4/grpcio-1.78.0-cp313-cp313-win32.whl", hash = "sha256:271c73e6e5676afe4fc52907686670c7cea22ab2310b76a59b678403ed40d670", size = 4064899, upload-time = "2026-02-06T09:56:15.601Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/27/d86b89e36de8a951501fb06a0f38df19853210f341d0b28f83f4aa0ffa08/grpcio-1.78.0-cp313-cp313-win_amd64.whl", hash = "sha256:f2d4e43ee362adfc05994ed479334d5a451ab7bc3f3fee1b796b8ca66895acb4", size = 4797393, upload-time = "2026-02-06T09:56:17.882Z" },
-    { url = "https://files.pythonhosted.org/packages/29/f2/b56e43e3c968bfe822fa6ce5bca10d5c723aa40875b48791ce1029bb78c7/grpcio-1.78.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:e87cbc002b6f440482b3519e36e1313eb5443e9e9e73d6a52d43bd2004fcfd8e", size = 5920591, upload-time = "2026-02-06T09:56:20.758Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/81/1f3b65bd30c334167bfa8b0d23300a44e2725ce39bba5b76a2460d85f745/grpcio-1.78.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:c41bc64626db62e72afec66b0c8a0da76491510015417c127bfc53b2fe6d7f7f", size = 11813685, upload-time = "2026-02-06T09:56:24.315Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/1c/bbe2f8216a5bd3036119c544d63c2e592bdf4a8ec6e4a1867592f4586b26/grpcio-1.78.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8dfffba826efcf366b1e3ccc37e67afe676f290e13a3b48d31a46739f80a8724", size = 6487803, upload-time = "2026-02-06T09:56:27.367Z" },
-    { url = "https://files.pythonhosted.org/packages/16/5c/a6b2419723ea7ddce6308259a55e8e7593d88464ce8db9f4aa857aba96fa/grpcio-1.78.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:74be1268d1439eaaf552c698cdb11cd594f0c49295ae6bb72c34ee31abbe611b", size = 7173206, upload-time = "2026-02-06T09:56:29.876Z" },
-    { url = "https://files.pythonhosted.org/packages/df/1e/b8801345629a415ea7e26c83d75eb5dbe91b07ffe5210cc517348a8d4218/grpcio-1.78.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:be63c88b32e6c0f1429f1398ca5c09bc64b0d80950c8bb7807d7d7fb36fb84c7", size = 6693826, upload-time = "2026-02-06T09:56:32.305Z" },
-    { url = "https://files.pythonhosted.org/packages/34/84/0de28eac0377742679a510784f049738a80424b17287739fc47d63c2439e/grpcio-1.78.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:3c586ac70e855c721bda8f548d38c3ca66ac791dc49b66a8281a1f99db85e452", size = 7277897, upload-time = "2026-02-06T09:56:34.915Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/9c/ad8685cfe20559a9edb66f735afdcb2b7d3de69b13666fdfc542e1916ebd/grpcio-1.78.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:35eb275bf1751d2ffbd8f57cdbc46058e857cf3971041521b78b7db94bdaf127", size = 8252404, upload-time = "2026-02-06T09:56:37.553Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/05/33a7a4985586f27e1de4803887c417ec7ced145ebd069bc38a9607059e2b/grpcio-1.78.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:207db540302c884b8848036b80db352a832b99dfdf41db1eb554c2c2c7800f65", size = 7696837, upload-time = "2026-02-06T09:56:40.173Z" },
-    { url = "https://files.pythonhosted.org/packages/73/77/7382241caf88729b106e49e7d18e3116216c778e6a7e833826eb96de22f7/grpcio-1.78.0-cp314-cp314-win32.whl", hash = "sha256:57bab6deef2f4f1ca76cc04565df38dc5713ae6c17de690721bdf30cb1e0545c", size = 4142439, upload-time = "2026-02-06T09:56:43.258Z" },
-    { url = "https://files.pythonhosted.org/packages/48/b2/b096ccce418882fbfda4f7496f9357aaa9a5af1896a9a7f60d9f2b275a06/grpcio-1.78.0-cp314-cp314-win_amd64.whl", hash = "sha256:dce09d6116df20a96acfdbf85e4866258c3758180e8c49845d6ba8248b6d0bbb", size = 4929852, upload-time = "2026-02-06T09:56:45.885Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/1f/de/de568532d9907552700f80dcec38219d8d298ad9e71f5e0a095abaf2761e/grpcio-1.78.1.tar.gz", hash = "sha256:27c625532d33ace45d57e775edf1982e183ff8641c72e4e91ef7ba667a149d72", size = 12835760, upload-time = "2026-02-20T01:16:10.869Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/44/30/0534b643dafd54824769d6260b89c71d518e4ef8b5ad16b84d1ae9272978/grpcio-1.78.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:4393bef64cf26dc07cd6f18eaa5170ae4eebaafd4418e7e3a59ca9526a6fa30b", size = 5947661, upload-time = "2026-02-20T01:12:34.922Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/f8/f678566655ab822da0f713789555e7eddca7ef93da99f480c63de3aa94b4/grpcio-1.78.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:917047c19cd120b40aab9a4b8a22e9ce3562f4a1343c0d62b3cd2d5199da3d67", size = 11819948, upload-time = "2026-02-20T01:12:39.709Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0b/a4b4210d946055f4e5a8430f2802202ae8f831b4b00d36d55055c5cf4b6a/grpcio-1.78.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ff7de398bb3528d44d17e6913a7cfe639e3b15c65595a71155322df16978c5e1", size = 6519850, upload-time = "2026-02-20T01:12:42.715Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/d9/a1e657a73000a71fa75ec7140ff3a8dc32eb3427560620e477c6a2735527/grpcio-1.78.1-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:15f6e636d1152667ddb4022b37534c161c8477274edb26a0b65b215dd0a81e97", size = 7198654, upload-time = "2026-02-20T01:12:46.164Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/28/a61c5bdf53c1638e657bb5eebb93c789837820e1fdb965145f05eccc2994/grpcio-1.78.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:27b5cb669603efb7883a882275db88b6b5d6b6c9f0267d5846ba8699b7ace338", size = 6727238, upload-time = "2026-02-20T01:12:48.472Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/3e/aa143d0687801986a29d85788c96089449f36651cd4e2a493737ae0c5be9/grpcio-1.78.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:86edb3966778fa05bfdb333688fde5dc9079f9e2a9aa6a5c42e9564b7656ba04", size = 7300960, upload-time = "2026-02-20T01:12:51.139Z" },
+    { url = "https://files.pythonhosted.org/packages/30/d3/53e0f26b46417f28d14b5951fc6a1eff79c08c8a339e967c0a19ec7cf9e9/grpcio-1.78.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:849cc62eb989bc3be5629d4f3acef79be0d0ff15622201ed251a86d17fef6494", size = 8285274, upload-time = "2026-02-20T01:12:53.315Z" },
+    { url = "https://files.pythonhosted.org/packages/29/d0/e0e9fd477ce86c07ed1ed1d5c34790f050b6d58bfde77b02b36e23f8b235/grpcio-1.78.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9a00992d6fafe19d648b9ccb4952200c50d8e36d0cce8cf026c56ed3fdc28465", size = 7726620, upload-time = "2026-02-20T01:12:56.498Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/b5/e138a9f7810d196081b2e047c378ca12358c5906d79c42ddec41bb43d528/grpcio-1.78.1-cp310-cp310-win32.whl", hash = "sha256:f8759a1347f3b4f03d9a9d4ce8f9f31ad5e5d0144ba06ccfb1ffaeb0ba4c1e20", size = 4076778, upload-time = "2026-02-20T01:12:59.098Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/95/9b02316b85731df0943a635ca6d02f155f673c4f17e60be0c4892a6eb051/grpcio-1.78.1-cp310-cp310-win_amd64.whl", hash = "sha256:e840405a3f1249509892be2399f668c59b9d492068a2cf326d661a8c79e5e747", size = 4798925, upload-time = "2026-02-20T01:13:03.186Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/1e/ad774af3b2c84f49c6d8c4a7bea4c40f02268ea8380630c28777edda463b/grpcio-1.78.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:3a8aa79bc6e004394c0abefd4b034c14affda7b66480085d87f5fbadf43b593b", size = 5951132, upload-time = "2026-02-20T01:13:05.942Z" },
+    { url = "https://files.pythonhosted.org/packages/48/9d/ad3c284bedd88c545e20675d98ae904114d8517a71b0efc0901e9166628f/grpcio-1.78.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:8e1fcb419da5811deb47b7749b8049f7c62b993ba17822e3c7231e3e0ba65b79", size = 11831052, upload-time = "2026-02-20T01:13:09.604Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/08/20d12865e47242d03c3ade9bb2127f5b4aded964f373284cfb357d47c5ac/grpcio-1.78.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b071dccac245c32cd6b1dd96b722283b855881ca0bf1c685cf843185f5d5d51e", size = 6524749, upload-time = "2026-02-20T01:13:21.692Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/53/a8b72f52b253ec0cfdf88a13e9236a9d717c332b8aa5f0ba9e4699e94b55/grpcio-1.78.1-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:d6fb962947e4fe321eeef3be1ba5ba49d32dea9233c825fcbade8e858c14aaf4", size = 7198995, upload-time = "2026-02-20T01:13:24.275Z" },
+    { url = "https://files.pythonhosted.org/packages/13/3c/ac769c8ded1bcb26bb119fb472d3374b481b3cf059a0875db9fc77139c17/grpcio-1.78.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6afd191551fd72e632367dfb083e33cd185bf9ead565f2476bba8ab864ae496", size = 6730770, upload-time = "2026-02-20T01:13:26.522Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/c3/2275ef4cc5b942314321f77d66179be4097ff484e82ca34bf7baa5b1ddbc/grpcio-1.78.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b2acd83186305c0802dbc4d81ed0ec2f3e8658d7fde97cfba2f78d7372f05b89", size = 7305036, upload-time = "2026-02-20T01:13:30.923Z" },
+    { url = "https://files.pythonhosted.org/packages/91/cb/3c2aa99e12cbbfc72c2ed8aa328e6041709d607d668860380e6cd00ba17d/grpcio-1.78.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5380268ab8513445740f1f77bd966d13043d07e2793487e61fd5b5d0935071eb", size = 8288641, upload-time = "2026-02-20T01:13:39.42Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/b2/21b89f492260ac645775d9973752ca873acfd0609d6998e9d3065a21ea2f/grpcio-1.78.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:389b77484959bdaad6a2b7dda44d7d1228381dd669a03f5660392aa0e9385b22", size = 7730967, upload-time = "2026-02-20T01:13:41.697Z" },
+    { url = "https://files.pythonhosted.org/packages/24/03/6b89eddf87fdffb8fa9d37375d44d3a798f4b8116ac363a5f7ca84caa327/grpcio-1.78.1-cp311-cp311-win32.whl", hash = "sha256:9dee66d142f4a8cca36b5b98a38f006419138c3c89e72071747f8fca415a6d8f", size = 4076680, upload-time = "2026-02-20T01:13:43.781Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/a8/204460b1bc1dff9862e98f56a2d14be3c4171f929f8eaf8c4517174b4270/grpcio-1.78.1-cp311-cp311-win_amd64.whl", hash = "sha256:43b930cf4f9c4a2262bb3e5d5bc40df426a72538b4f98e46f158b7eb112d2d70", size = 4801074, upload-time = "2026-02-20T01:13:46.315Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/ed/d2eb9d27fded1a76b2a80eb9aa8b12101da7e41ce2bac0ad3651e88a14ae/grpcio-1.78.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:41e4605c923e0e9a84a2718e4948a53a530172bfaf1a6d1ded16ef9c5849fca2", size = 5913389, upload-time = "2026-02-20T01:13:49.005Z" },
+    { url = "https://files.pythonhosted.org/packages/69/1b/40034e9ab010eeb3fa41ec61d8398c6dbf7062f3872c866b8f72700e2522/grpcio-1.78.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:39da1680d260c0c619c3b5fa2dc47480ca24d5704c7a548098bca7de7f5dd17f", size = 11811839, upload-time = "2026-02-20T01:13:51.839Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/69/fe16ef2979ea62b8aceb3a3f1e7a8bbb8b717ae2a44b5899d5d426073273/grpcio-1.78.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b5d5881d72a09b8336a8f874784a8eeffacde44a7bc1a148bce5a0243a265ef0", size = 6475805, upload-time = "2026-02-20T01:13:55.423Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/1e/069e0a9062167db18446917d7c00ae2e91029f96078a072bedc30aaaa8c3/grpcio-1.78.1-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:888ceb7821acd925b1c90f0cdceaed1386e69cfe25e496e0771f6c35a156132f", size = 7169955, upload-time = "2026-02-20T01:13:59.553Z" },
+    { url = "https://files.pythonhosted.org/packages/38/fc/44a57e2bb4a755e309ee4e9ed2b85c9af93450b6d3118de7e69410ee05fa/grpcio-1.78.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8942bdfc143b467c264b048862090c4ba9a0223c52ae28c9ae97754361372e42", size = 6690767, upload-time = "2026-02-20T01:14:02.31Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/87/21e16345d4c75046d453916166bc72a3309a382c8e97381ec4b8c1a54729/grpcio-1.78.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:716a544969660ed609164aff27b2effd3ff84e54ac81aa4ce77b1607ca917d22", size = 7266846, upload-time = "2026-02-20T01:14:12.974Z" },
+    { url = "https://files.pythonhosted.org/packages/11/df/d6261983f9ca9ef4d69893765007a9a3211b91d9faf85a2591063df381c7/grpcio-1.78.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4d50329b081c223d444751076bb5b389d4f06c2b32d51b31a1e98172e6cecfb9", size = 8253522, upload-time = "2026-02-20T01:14:17.407Z" },
+    { url = "https://files.pythonhosted.org/packages/de/7c/4f96a0ff113c5d853a27084d7590cd53fdb05169b596ea9f5f27f17e021e/grpcio-1.78.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7e836778c13ff70edada16567e8da0c431e8818eaae85b80d11c1ba5782eccbb", size = 7698070, upload-time = "2026-02-20T01:14:20.032Z" },
+    { url = "https://files.pythonhosted.org/packages/17/3c/7b55c0b5af88fbeb3d0c13e25492d3ace41ac9dbd0f5f8f6c0fb613b6706/grpcio-1.78.1-cp312-cp312-win32.whl", hash = "sha256:07eb016ea7444a22bef465cce045512756956433f54450aeaa0b443b8563b9ca", size = 4066474, upload-time = "2026-02-20T01:14:22.602Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/17/388c12d298901b0acf10b612b650692bfed60e541672b1d8965acbf2d722/grpcio-1.78.1-cp312-cp312-win_amd64.whl", hash = "sha256:02b82dcd2fa580f5e82b4cf62ecde1b3c7cc9ba27b946421200706a6e5acaf85", size = 4797537, upload-time = "2026-02-20T01:14:25.444Z" },
+    { url = "https://files.pythonhosted.org/packages/df/72/754754639cfd16ad04619e1435a518124b2d858e5752225376f9285d4c51/grpcio-1.78.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:2b7ad2981550ce999e25ce3f10c8863f718a352a2fd655068d29ea3fd37b4907", size = 5919437, upload-time = "2026-02-20T01:14:29.403Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/84/6267d1266f8bc335d3a8b7ccf981be7de41e3ed8bd3a49e57e588212b437/grpcio-1.78.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:409bfe22220889b9906739910a0ee4c197a967c21b8dd14b4b06dd477f8819ce", size = 11803701, upload-time = "2026-02-20T01:14:32.624Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/56/c9098e8b920a54261cd605bbb040de0cde1ca4406102db0aa2c0b11d1fb4/grpcio-1.78.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:34b6cb16f4b67eeb5206250dc5b4d5e8e3db939535e58efc330e4c61341554bd", size = 6479416, upload-time = "2026-02-20T01:14:35.926Z" },
+    { url = "https://files.pythonhosted.org/packages/86/cf/5d52024371ee62658b7ed72480200524087528844ec1b65265bbcd31c974/grpcio-1.78.1-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:39d21fd30d38a5afb93f0e2e71e2ec2bd894605fb75d41d5a40060c2f98f8d11", size = 7174087, upload-time = "2026-02-20T01:14:39.98Z" },
+    { url = "https://files.pythonhosted.org/packages/31/e6/5e59551afad4279e27335a6d60813b8aa3ae7b14fb62cea1d329a459c118/grpcio-1.78.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:09fbd4bcaadb6d8604ed1504b0bdf7ac18e48467e83a9d930a70a7fefa27e862", size = 6692881, upload-time = "2026-02-20T01:14:42.466Z" },
+    { url = "https://files.pythonhosted.org/packages/db/8f/940062de2d14013c02f51b079eb717964d67d46f5d44f22038975c9d9576/grpcio-1.78.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:db681513a1bdd879c0b24a5a6a70398da5eaaba0e077a306410dc6008426847a", size = 7269092, upload-time = "2026-02-20T01:14:45.826Z" },
+    { url = "https://files.pythonhosted.org/packages/09/87/9db657a4b5f3b15560ec591db950bc75a1a2f9e07832578d7e2b23d1a7bd/grpcio-1.78.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f81816faa426da461e9a597a178832a351d6f1078102590a4b32c77d251b71eb", size = 8252037, upload-time = "2026-02-20T01:14:48.57Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/37/b980e0265479ec65e26b6e300a39ceac33ecb3f762c2861d4bac990317cf/grpcio-1.78.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ffbb760df1cd49e0989f9826b2fd48930700db6846ac171eaff404f3cfbe5c28", size = 7695243, upload-time = "2026-02-20T01:14:51.376Z" },
+    { url = "https://files.pythonhosted.org/packages/98/46/5fc42c100ab702fa1ea41a75c890c563c3f96432b4a287d5a6369654f323/grpcio-1.78.1-cp313-cp313-win32.whl", hash = "sha256:1a56bf3ee99af5cf32d469de91bf5de79bdac2e18082b495fc1063ea33f4f2d0", size = 4065329, upload-time = "2026-02-20T01:14:53.952Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/da/806d60bb6611dfc16cf463d982bd92bd8b6bd5f87dfac66b0a44dfe20995/grpcio-1.78.1-cp313-cp313-win_amd64.whl", hash = "sha256:8991c2add0d8505178ff6c3ae54bd9386279e712be82fa3733c54067aae9eda1", size = 4797637, upload-time = "2026-02-20T01:14:57.276Z" },
+    { url = "https://files.pythonhosted.org/packages/96/3a/2d2ec4d2ce2eb9d6a2b862630a0d9d4ff4239ecf1474ecff21442a78612a/grpcio-1.78.1-cp314-cp314-linux_armv7l.whl", hash = "sha256:d101fe49b1e0fb4a7aa36ed0c3821a0f67a5956ef572745452d2cd790d723a3f", size = 5920256, upload-time = "2026-02-20T01:15:00.23Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/92/dccb7d087a1220ed358753945230c1ddeeed13684b954cb09db6758f1271/grpcio-1.78.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:5ce1855e8cfc217cdf6bcfe0cf046d7cf81ddcc3e6894d6cfd075f87a2d8f460", size = 11813749, upload-time = "2026-02-20T01:15:03.312Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/47/c20e87f87986da9998f30f14776ce27e61f02482a3a030ffe265089342c6/grpcio-1.78.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd26048d066b51f39fe9206e2bcc2cea869a5e5b2d13c8d523f4179193047ebd", size = 6488739, upload-time = "2026-02-20T01:15:14.349Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/c2/088bd96e255133d7d87c3eed0d598350d16cde1041bdbe2bb065967aaf91/grpcio-1.78.1-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4b8d7fda614cf2af0f73bbb042f3b7fee2ecd4aea69ec98dbd903590a1083529", size = 7173096, upload-time = "2026-02-20T01:15:17.687Z" },
+    { url = "https://files.pythonhosted.org/packages/60/ce/168db121073a03355ce3552b3b1f790b5ded62deffd7d98c5f642b9d3d81/grpcio-1.78.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:656a5bd142caeb8b1efe1fe0b4434ecc7781f44c97cfc7927f6608627cf178c0", size = 6693861, upload-time = "2026-02-20T01:15:20.911Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/d0/90b30ec2d9425215dd56922d85a90babbe6ee7e8256ba77d866b9c0d3aba/grpcio-1.78.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:99550e344482e3c21950c034f74668fccf8a546d50c1ecb4f717543bbdc071ba", size = 7278083, upload-time = "2026-02-20T01:15:23.698Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/fb/73f9ba0b082bcd385d46205095fd9c917754685885b28fce3741e9f54529/grpcio-1.78.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:8f27683ca68359bd3f0eb4925824d71e538f84338b3ae337ead2ae43977d7541", size = 8252546, upload-time = "2026-02-20T01:15:26.517Z" },
+    { url = "https://files.pythonhosted.org/packages/85/c5/6a89ea3cb5db6c3d9ed029b0396c49f64328c0cf5d2630ffeed25711920a/grpcio-1.78.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a40515b69ac50792f9b8ead260f194ba2bb3285375b6c40c7ff938f14c3df17d", size = 7696289, upload-time = "2026-02-20T01:15:29.718Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/05/63a7495048499ef437b4933d32e59b7f737bd5368ad6fb2479e2bd83bf2c/grpcio-1.78.1-cp314-cp314-win32.whl", hash = "sha256:2c473b54ef1618f4fb85e82ff4994de18143b74efc088b91b5a935a3a45042ba", size = 4142186, upload-time = "2026-02-20T01:15:32.786Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/ce/adfe7e5f701d503be7778291757452e3fab6b19acf51917c79f5d1cf7f8a/grpcio-1.78.1-cp314-cp314-win_amd64.whl", hash = "sha256:e2a6b33d1050dce2c6f563c5caf7f7cbeebf7fba8cde37ffe3803d50526900d1", size = 4932000, upload-time = "2026-02-20T01:15:36.127Z" },
 ]
 
 [[package]]
@@ -1921,34 +1940,31 @@ wheels = [
 
 [[package]]
 name = "hf-xet"
-version = "1.4.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/09/08/23c84a26716382c89151b5b447b4beb19e3345f3a93d3b73009a71a57ad3/hf_xet-1.4.2.tar.gz", hash = "sha256:b7457b6b482d9e0743bd116363239b1fa904a5e65deede350fbc0c4ea67c71ea", size = 672357, upload-time = "2026-03-13T06:58:51.077Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/18/06/e8cf74c3c48e5485c7acc5a990d0d8516cdfb5fdf80f799174f1287cc1b5/hf_xet-1.4.2-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ac8202ae1e664b2c15cdfc7298cbb25e80301ae596d602ef7870099a126fcad4", size = 3796125, upload-time = "2026-03-13T06:58:33.177Z" },
-    { url = "https://files.pythonhosted.org/packages/66/d4/b73ebab01cbf60777323b7de9ef05550790451eb5172a220d6b9845385ec/hf_xet-1.4.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6d2f8ee39fa9fba9af929f8c0d0482f8ee6e209179ad14a909b6ad78ffcb7c81", size = 3555985, upload-time = "2026-03-13T06:58:31.797Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/e7/ded6d1bd041c3f2bca9e913a0091adfe32371988e047dd3a68a2463c15a2/hf_xet-1.4.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4642a6cf249c09da8c1f87fe50b24b2a3450b235bf8adb55700b52f0ea6e2eb6", size = 4212085, upload-time = "2026-03-13T06:58:24.323Z" },
-    { url = "https://files.pythonhosted.org/packages/97/c1/a0a44d1f98934f7bdf17f7a915b934f9fca44bb826628c553589900f6df8/hf_xet-1.4.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:769431385e746c92dc05492dde6f687d304584b89c33d79def8367ace06cb555", size = 3988266, upload-time = "2026-03-13T06:58:22.887Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/82/be713b439060e7d1f1d93543c8053d4ef2fe7e6922c5b31642eaa26f3c4b/hf_xet-1.4.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c9dd1c1bc4cc56168f81939b0e05b4c36dd2d28c13dc1364b17af89aa0082496", size = 4188513, upload-time = "2026-03-13T06:58:40.858Z" },
-    { url = "https://files.pythonhosted.org/packages/21/a6/cbd4188b22abd80ebd0edbb2b3e87f2633e958983519980815fb8314eae5/hf_xet-1.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:fca58a2ae4e6f6755cc971ac6fcdf777ea9284d7e540e350bb000813b9a3008d", size = 4428287, upload-time = "2026-03-13T06:58:42.601Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/4e/84e45b25e2e3e903ed3db68d7eafa96dae9a1d1f6d0e7fc85120347a852f/hf_xet-1.4.2-cp313-cp313t-win_amd64.whl", hash = "sha256:163aab46854ccae0ab6a786f8edecbbfbaa38fcaa0184db6feceebf7000c93c0", size = 3665574, upload-time = "2026-03-13T06:58:53.881Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/71/c5ac2b9a7ae39c14e91973035286e73911c31980fe44e7b1d03730c00adc/hf_xet-1.4.2-cp313-cp313t-win_arm64.whl", hash = "sha256:09b138422ecbe50fd0c84d4da5ff537d27d487d3607183cd10e3e53f05188e82", size = 3528760, upload-time = "2026-03-13T06:58:52.187Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/0f/fcd2504015eab26358d8f0f232a1aed6b8d363a011adef83fe130bff88f7/hf_xet-1.4.2-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:949dcf88b484bb9d9276ca83f6599e4aa03d493c08fc168c124ad10b2e6f75d7", size = 3796493, upload-time = "2026-03-13T06:58:39.267Z" },
-    { url = "https://files.pythonhosted.org/packages/82/56/19c25105ff81731ca6d55a188b5de2aa99d7a2644c7aa9de1810d5d3b726/hf_xet-1.4.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:41659966020d59eb9559c57de2cde8128b706a26a64c60f0531fa2318f409418", size = 3555797, upload-time = "2026-03-13T06:58:37.546Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/e3/8933c073186849b5e06762aa89847991d913d10a95d1603eb7f2c3834086/hf_xet-1.4.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c588e21d80010119458dd5d02a69093f0d115d84e3467efe71ffb2c67c19146", size = 4212127, upload-time = "2026-03-13T06:58:30.539Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/01/f89ebba4e369b4ed699dcb60d3152753870996f41c6d22d3d7cac01310e1/hf_xet-1.4.2-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a296744d771a8621ad1d50c098d7ab975d599800dae6d48528ba3944e5001ba0", size = 3987788, upload-time = "2026-03-13T06:58:29.139Z" },
-    { url = "https://files.pythonhosted.org/packages/84/4d/8a53e5ffbc2cc33bbf755382ac1552c6d9af13f623ed125fe67cc3e6772f/hf_xet-1.4.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f563f7efe49588b7d0629d18d36f46d1658fe7e08dce3fa3d6526e1c98315e2d", size = 4188315, upload-time = "2026-03-13T06:58:48.017Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/b8/b7a1c1b5592254bd67050632ebbc1b42cc48588bf4757cb03c2ef87e704a/hf_xet-1.4.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5b2e0132c56d7ee1bf55bdb638c4b62e7106f6ac74f0b786fed499d5548c5570", size = 4428306, upload-time = "2026-03-13T06:58:49.502Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/0c/40779e45b20e11c7c5821a94135e0207080d6b3d76e7b78ccb413c6f839b/hf_xet-1.4.2-cp314-cp314t-win_amd64.whl", hash = "sha256:2f45c712c2fa1215713db10df6ac84b49d0e1c393465440e9cb1de73ecf7bbf6", size = 3665826, upload-time = "2026-03-13T06:58:59.88Z" },
-    { url = "https://files.pythonhosted.org/packages/51/4c/e2688c8ad1760d7c30f7c429c79f35f825932581bc7c9ec811436d2f21a0/hf_xet-1.4.2-cp314-cp314t-win_arm64.whl", hash = "sha256:6d53df40616f7168abfccff100d232e9d460583b9d86fa4912c24845f192f2b8", size = 3529113, upload-time = "2026-03-13T06:58:58.491Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/86/b40b83a2ff03ef05c4478d2672b1fc2b9683ff870e2b25f4f3af240f2e7b/hf_xet-1.4.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:71f02d6e4cdd07f344f6844845d78518cc7186bd2bc52d37c3b73dc26a3b0bc5", size = 3800339, upload-time = "2026-03-13T06:58:36.245Z" },
-    { url = "https://files.pythonhosted.org/packages/64/2e/af4475c32b4378b0e92a587adb1aa3ec53e3450fd3e5fe0372a874531c00/hf_xet-1.4.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e9b38d876e94d4bdcf650778d6ebbaa791dd28de08db9736c43faff06ede1b5a", size = 3559664, upload-time = "2026-03-13T06:58:34.787Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/4c/781267da3188db679e601de18112021a5cb16506fe86b246e22c5401a9c4/hf_xet-1.4.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:77e8c180b7ef12d8a96739a4e1e558847002afe9ea63b6f6358b2271a8bdda1c", size = 4217422, upload-time = "2026-03-13T06:58:27.472Z" },
-    { url = "https://files.pythonhosted.org/packages/68/47/d6cf4a39ecf6c7705f887a46f6ef5c8455b44ad9eb0d391aa7e8a2ff7fea/hf_xet-1.4.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c3b3c6a882016b94b6c210957502ff7877802d0dbda8ad142c8595db8b944271", size = 3992847, upload-time = "2026-03-13T06:58:25.989Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/ef/e80815061abff54697239803948abc665c6b1d237102c174f4f7a9a5ffc5/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9d9a634cc929cfbaf2e1a50c0e532ae8c78fa98618426769480c58501e8c8ac2", size = 4193843, upload-time = "2026-03-13T06:58:44.59Z" },
-    { url = "https://files.pythonhosted.org/packages/54/75/07f6aa680575d9646c4167db6407c41340cbe2357f5654c4e72a1b01ca14/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6b0932eb8b10317ea78b7da6bab172b17be03bbcd7809383d8d5abd6a2233e04", size = 4432751, upload-time = "2026-03-13T06:58:46.533Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/71/193eabd7e7d4b903c4aa983a215509c6114915a5a237525ec562baddb868/hf_xet-1.4.2-cp37-abi3-win_amd64.whl", hash = "sha256:ad185719fb2e8ac26f88c8100562dbf9dbdcc3d9d2add00faa94b5f106aea53f", size = 3671149, upload-time = "2026-03-13T06:58:57.07Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/7e/ccf239da366b37ba7f0b36095450efae4a64980bdc7ec2f51354205fdf39/hf_xet-1.4.2-cp37-abi3-win_arm64.whl", hash = "sha256:32c012286b581f783653e718c1862aea5b9eb140631685bb0c5e7012c8719a87", size = 3533426, upload-time = "2026-03-13T06:58:55.46Z" },
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4f/3a/9aa61729228fb03e946409c51963f0cd2fd7c109f4ab93edc5f04a10be86/hf_xet-1.3.0.tar.gz", hash = "sha256:9c154ad63e17aca970987b2cf17dbd8a0c09bb18aeb246f637647a8058e4522b", size = 641390, upload-time = "2026-02-24T00:16:19.935Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/18/16954a87cfdfdc04792f1ffc9a29c0a48253ab10ec0f4856f39c7f7bf7cd/hf_xet-1.3.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:95bdeab4747cb45f855601e39b9e86ae92b4a114978ada6e0401961fcc5d2958", size = 3759481, upload-time = "2026-02-24T00:16:03.387Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/6f/a55752047e9b0e69517775531c14680331f00c9cd4dc07f5e9b7f7f68a12/hf_xet-1.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f99992583f27b139392601fe99e88df155dc4de7feba98ed27ce2d3e6b4a65bb", size = 3517927, upload-time = "2026-02-24T00:16:02.108Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/71/a909dbf9c8b166aa3f15db2bcf5d8afbe9d53170922edde2b919cf0bc455/hf_xet-1.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:687a71fc6d2eaa79d864da3aa13e5d887e124d357f5f306bfff6c385eea9d990", size = 4174328, upload-time = "2026-02-24T00:15:55.056Z" },
+    { url = "https://files.pythonhosted.org/packages/21/cc/dec0d971bb5872345b8d64363a0b78ed6a147eea5b4281575ce5a8150f42/hf_xet-1.3.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:75d19813ed0e24525409bc22566282ae9bc93e5d764b185565e863dc28280a45", size = 3953184, upload-time = "2026-02-24T00:15:53.43Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/d8/d4259146e7c7089dd3f22cd62676d665bcfbc27428a070abee8985e0ab33/hf_xet-1.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:078af43569c2e05233137a93a33d2293f95c272745eaf030a9bb5f27bb0c9e9c", size = 4152800, upload-time = "2026-02-24T00:16:10.391Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/0d/39d9d32e4cde689da618739197e264bba5a55d870377d5d32cdd5c03fad8/hf_xet-1.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:be8731e1620cc8549025c39ed3917c8fd125efaeae54ae679214a3d573e6c109", size = 4390499, upload-time = "2026-02-24T00:16:11.671Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/27/5b9c323bf5513e8971702eeac43ba5cb554921e0f292ad52f20ed6028131/hf_xet-1.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:1552616c0e0fa728a4ffdffa106e91faa0fd4edb44868e79b464fad00b2758ee", size = 3634124, upload-time = "2026-02-24T00:16:20.964Z" },
+    { url = "https://files.pythonhosted.org/packages/85/32/76949adb65b7ca54c1e2b0519a98f7c88221b9091ae8780fc76d7d1bae70/hf_xet-1.3.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:a61496eccf412d7c51a5613c31a2051d357ddea6be53a0672c7644cf39bfefe9", size = 3759780, upload-time = "2026-02-24T00:16:09.037Z" },
+    { url = "https://files.pythonhosted.org/packages/63/c4/ad6fa712611711c129fa49eb17baaf0665647eb0abce32d94ccd44b69c6d/hf_xet-1.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:aba35218871cc438826076778958f7ab2a1f4f8d654e91c307073a815360558f", size = 3517640, upload-time = "2026-02-24T00:16:07.536Z" },
+    { url = "https://files.pythonhosted.org/packages/15/6b/b44659c5261cde6320a579d0acc949f19283a13d32fc9389fc49639f435e/hf_xet-1.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c444d8f657dedd7a72aa0ef0178fe01fe92b04b58014ee49e2b3b4985aea1529", size = 4174285, upload-time = "2026-02-24T00:16:00.848Z" },
+    { url = "https://files.pythonhosted.org/packages/61/cf/16ef1b366482fa4e71d1642b019158d7ac891bcb961477102ceadfe69436/hf_xet-1.3.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:6d1bbda7900d72bc591cd39a64e35ad07f89a24f90e3d7b7c692cb93a1926cde", size = 3952705, upload-time = "2026-02-24T00:15:59.355Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/5a/d03453902ab9373715f50f3969979782a355df94329ea958ae78304ca06b/hf_xet-1.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:588f5df302e7dba5c3b60d4e5c683f95678526c29b9f64cbeb23e9f1889c6b83", size = 4152353, upload-time = "2026-02-24T00:16:15.857Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/98/d3cd8cdd8d771bee9a03bd52faed6fa114a68a107a0e337aaf0b4c52bf0c/hf_xet-1.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:944ae454b296c42b18219c37f245c78d0e64a734057423e9309f4938faa85d7f", size = 4390010, upload-time = "2026-02-24T00:16:18.713Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/10/3c58501d44d7a148d749ffa6046cbd14aa75a7ab07c9e7a984f86294cc53/hf_xet-1.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:34cdd5f10e61b7a1a7542672d20887c85debcfeb70a471ff1506f5a4c9441e42", size = 3634277, upload-time = "2026-02-24T00:16:23.718Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/00/22d3d896466ded4c46ef6465b85fa434fa97d79f8f61cea322afde1d6157/hf_xet-1.3.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:df4447f69086dcc6418583315eda6ed09033ac1fbbc784fedcbbbdf67bea1680", size = 3761293, upload-time = "2026-02-24T00:16:06.012Z" },
+    { url = "https://files.pythonhosted.org/packages/97/fd/ebb0ea49e9bd9eb9f52844e417e0e6e9c8a59a1e84790691873fa910adc5/hf_xet-1.3.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:39f4fe714628adc2214ab4a67391182ee751bc4db581868cb3204900817758a8", size = 3523345, upload-time = "2026-02-24T00:16:04.615Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/bb/72ceaaf619cad23d151a281d52e15456bae72f52c3795e820c0b64a5f637/hf_xet-1.3.0-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9b16e53ed6b5c8197cefb3fd12047a430b7034428effed463c03cec68de7e9a3", size = 4178623, upload-time = "2026-02-24T00:15:57.857Z" },
+    { url = "https://files.pythonhosted.org/packages/19/30/3280f4b5e407b442923a80ac0b2d96a65be7494457c55695e63f9a2b33dd/hf_xet-1.3.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:92051a1f73019489be77f6837671024ec785a3d1b888466b09d3a9ea15c4a1b5", size = 3958884, upload-time = "2026-02-24T00:15:56.326Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/13/5174c6d52583e54a761c88570ca657d621ac684747613f47846debfd6d4d/hf_xet-1.3.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:943046b160e7804a85e68a659d2eee1a83ce3661f72d1294d3cc5ece0f45a355", size = 4158146, upload-time = "2026-02-24T00:16:13.158Z" },
+    { url = "https://files.pythonhosted.org/packages/12/13/ea8619021b119e19efdcaeec72f762b5be923cf79b5d4434f2cbbff39829/hf_xet-1.3.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9b798a95d41b4f33b0b455c8aa76ff1fd26a587a4dd3bdec29f0a37c60b78a2f", size = 4395565, upload-time = "2026-02-24T00:16:14.574Z" },
+    { url = "https://files.pythonhosted.org/packages/64/cd/b81d922118a171bfbbecffd60a477e79188ab876260412fac47226a685bf/hf_xet-1.3.0-cp37-abi3-win_amd64.whl", hash = "sha256:227eee5b99d19b9f20c31d901a0c2373af610a24a34e6c2701072c9de48d6d95", size = 3637830, upload-time = "2026-02-24T00:16:22.474Z" },
 ]
 
 [[package]]
@@ -2013,7 +2029,8 @@ version = "0.36.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
-    { name = "fsspec" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
     { name = "packaging" },
     { name = "pyyaml" },
@@ -2065,11 +2082,11 @@ wheels = [
 
 [[package]]
 name = "imagesize"
-version = "2.0.0"
+version = "1.4.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/6c/e6/7bf14eeb8f8b7251141944835abd42eb20a658d89084b7e1f3e5fe394090/imagesize-2.0.0.tar.gz", hash = "sha256:8e8358c4a05c304f1fccf7ff96f036e7243a189e9e42e90851993c558cfe9ee3", size = 1773045, upload-time = "2026-03-03T14:18:29.941Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a7/84/62473fb57d61e31fef6e36d64a179c8781605429fd927b5dd608c997be31/imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a", size = 1280026, upload-time = "2022-07-01T12:21:05.687Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5f/53/fb7122b71361a0d121b669dcf3d31244ef75badbbb724af388948de543e2/imagesize-2.0.0-py2.py3-none-any.whl", hash = "sha256:5667c5bbb57ab3f1fa4bc366f4fbc971db3d5ed011fd2715fd8001f782718d96", size = 9441, upload-time = "2026-03-03T14:18:27.892Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769, upload-time = "2022-07-01T12:21:02.467Z" },
 ]
 
 [[package]]
@@ -2418,7 +2435,7 @@ wheels = [
 
 [[package]]
 name = "mamba-ssm"
-version = "2.3.1"
+version = "2.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "einops" },
@@ -2429,7 +2446,7 @@ dependencies = [
     { name = "transformers" },
     { name = "triton", marker = "sys_platform == 'never'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/34/67/ec89aa703da194a813e35d2ea2de8f74a7ce6991a120a29f3a0c5e30d4b9/mamba_ssm-2.3.1.tar.gz", hash = "sha256:4d529477ad94753962216d583fc8f1c127c717b7d7c875d6bbb9376366d0d761", size = 121707, upload-time = "2026-03-10T09:27:34.798Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/54/69/a87f06d9dba78c041adb81f2228e978aab179477c64f1a210c0fe0d63e8d/mamba_ssm-2.3.0.tar.gz", hash = "sha256:8294e12125f76021e4e190f4137e84a84935920eeda5d0037a6917524456b303", size = 121116, upload-time = "2026-01-12T17:07:22.152Z" }
 
 [[package]]
 name = "markdown"
@@ -2445,7 +2462,8 @@ name = "markdown-it-py"
 version = "3.0.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.11'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "mdurl", marker = "python_full_version < '3.11'" },
@@ -2460,18 +2478,22 @@ name = "markdown-it-py"
 version = "4.0.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
     { name = "mdurl", marker = "python_full_version >= '3.11'" },
@@ -2602,7 +2624,7 @@ name = "megatron-core"
 source = { editable = "." }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "torch", marker = "sys_platform == 'never' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
@@ -2611,9 +2633,9 @@ dependencies = [
 dev = [
     { name = "av" },
     { name = "causal-conv1d" },
-    { name = "datasets", version = "4.7.0", source = { registry = "https://pypi.org/simple" } },
+    { name = "datasets" },
     { name = "einops" },
-    { name = "emerging-optimizers" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12'" },
     { name = "fastapi" },
     { name = "flash-linear-attention" },
     { name = "flashinfer-python" },
@@ -2621,16 +2643,16 @@ dev = [
     { name = "mamba-ssm" },
     { name = "megatron-energon", extra = ["av-decode"], marker = "extra == 'extra-13-megatron-core-dev'" },
     { name = "multi-storage-client" },
+    { name = "nv-grouped-gemm" },
     { name = "nvidia-modelopt", marker = "(sys_platform != 'darwin' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "nvidia-resiliency-ext" },
     { name = "nvtx" },
     { name = "onnxscript" },
     { name = "openai", extra = ["aiohttp"], marker = "extra == 'extra-13-megatron-core-dev'" },
     { name = "opentelemetry-api", version = "1.33.1", source = { registry = "https://pypi.org/simple" } },
-    { name = "orjson" },
     { name = "quart" },
     { name = "tensorstore", version = "0.1.78", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "tensorstore", version = "0.1.82", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.81", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "tqdm" },
     { name = "transformer-engine", marker = "extra == 'extra-13-megatron-core-dev'" },
     { name = "wget" },
@@ -2638,20 +2660,20 @@ dev = [
 lts = [
     { name = "av" },
     { name = "causal-conv1d" },
-    { name = "datasets", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "datasets", version = "4.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'win32' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "datasets" },
     { name = "einops" },
-    { name = "emerging-optimizers" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12'" },
     { name = "fastapi" },
     { name = "flashinfer-python" },
     { name = "mamba-ssm" },
     { name = "megatron-energon", extra = ["av-decode"], marker = "extra == 'extra-13-megatron-core-lts'" },
     { name = "multi-storage-client" },
+    { name = "nv-grouped-gemm" },
     { name = "nvtx" },
     { name = "onnxscript" },
     { name = "opentelemetry-api", version = "1.33.1", source = { registry = "https://pypi.org/simple" } },
     { name = "tensorstore", version = "0.1.78", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "tensorstore", version = "0.1.82", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "tensorstore", version = "0.1.81", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "tqdm" },
     { name = "wget" },
 ]
@@ -2663,14 +2685,6 @@ mlm = [
     { name = "transformers" },
     { name = "wandb" },
 ]
-training = [
-    { name = "accelerate" },
-    { name = "flask-restful" },
-    { name = "sentencepiece" },
-    { name = "tiktoken" },
-    { name = "transformers" },
-    { name = "wandb" },
-]
 
 [package.dev-dependencies]
 build = [
@@ -2708,12 +2722,11 @@ linting = [
     { name = "ruff" },
 ]
 no-pypi-wheels = [
-    { name = "emerging-optimizers" },
-    { name = "flash-mla" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "fast-hadamard-transform" },
 ]
 test = [
     { name = "coverage" },
-    { name = "mock" },
     { name = "nemo-run" },
     { name = "nltk" },
     { name = "pydantic" },
@@ -2725,13 +2738,13 @@ test = [
     { name = "pytest-random-order" },
     { name = "pyyaml" },
     { name = "tensorboard" },
-    { name = "wrapt" },
+    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "wrapt", version = "2.1.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "accelerate", marker = "extra == 'mlm'" },
-    { name = "accelerate", marker = "extra == 'training'" },
     { name = "av", marker = "extra == 'dev'" },
     { name = "av", marker = "extra == 'lts'" },
     { name = "causal-conv1d", marker = "extra == 'dev'", specifier = "~=1.5" },
@@ -2740,15 +2753,14 @@ requires-dist = [
     { name = "datasets", marker = "extra == 'lts'" },
     { name = "einops", marker = "extra == 'dev'", specifier = "~=0.8" },
     { name = "einops", marker = "extra == 'lts'", specifier = "~=0.8" },
-    { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
-    { name = "emerging-optimizers", marker = "extra == 'lts'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' and extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.2.0" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12' and extra == 'lts'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.2.0" },
     { name = "fastapi", marker = "extra == 'dev'", specifier = "~=0.50" },
     { name = "fastapi", marker = "extra == 'lts'", specifier = "~=0.50" },
     { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.4.0" },
     { name = "flashinfer-python", marker = "extra == 'dev'", specifier = "~=0.5.0" },
     { name = "flashinfer-python", marker = "extra == 'lts'", specifier = "~=0.5.0" },
     { name = "flask-restful", marker = "extra == 'mlm'" },
-    { name = "flask-restful", marker = "extra == 'training'" },
     { name = "hypercorn", marker = "extra == 'dev'" },
     { name = "mamba-ssm", marker = "extra == 'dev'", specifier = "~=2.2" },
     { name = "mamba-ssm", marker = "extra == 'lts'", specifier = "~=2.2" },
@@ -2757,8 +2769,10 @@ requires-dist = [
     { name = "multi-storage-client", marker = "extra == 'dev'", specifier = "~=0.27" },
     { name = "multi-storage-client", marker = "extra == 'lts'", specifier = "~=0.27" },
     { name = "numpy" },
+    { name = "nv-grouped-gemm", marker = "extra == 'dev'", specifier = "~=1.1" },
+    { name = "nv-grouped-gemm", marker = "extra == 'lts'", specifier = "~=1.1" },
     { name = "nvidia-modelopt", extras = ["torch"], marker = "sys_platform != 'darwin' and extra == 'dev'" },
-    { name = "nvidia-resiliency-ext", marker = "extra == 'dev'", git = "https://github.com/NVIDIA/nvidia-resiliency-ext.git?rev=v0.5.0" },
+    { name = "nvidia-resiliency-ext", marker = "extra == 'dev'" },
     { name = "nvtx", marker = "extra == 'dev'", specifier = "~=0.2" },
     { name = "nvtx", marker = "extra == 'lts'", specifier = "~=0.2" },
     { name = "onnxscript", marker = "extra == 'dev'" },
@@ -2766,27 +2780,22 @@ requires-dist = [
     { name = "openai", extras = ["aiohttp"], marker = "extra == 'dev'" },
     { name = "opentelemetry-api", marker = "extra == 'dev'", specifier = "~=1.33.1" },
     { name = "opentelemetry-api", marker = "extra == 'lts'", specifier = "~=1.33.1" },
-    { name = "orjson", marker = "extra == 'dev'" },
     { name = "packaging", specifier = ">=24.2" },
     { name = "quart", marker = "extra == 'dev'" },
     { name = "sentencepiece", marker = "extra == 'mlm'" },
-    { name = "sentencepiece", marker = "extra == 'training'" },
     { name = "tensorstore", marker = "extra == 'dev'", specifier = "~=0.1,!=0.1.46,!=0.1.72" },
     { name = "tensorstore", marker = "extra == 'lts'", specifier = "~=0.1,!=0.1.46,!=0.1.72" },
     { name = "tiktoken", marker = "extra == 'mlm'" },
-    { name = "tiktoken", marker = "extra == 'training'" },
     { name = "torch", specifier = ">=2.6.0" },
     { name = "tqdm", marker = "extra == 'dev'" },
     { name = "tqdm", marker = "extra == 'lts'" },
-    { name = "transformer-engine", extras = ["core-cu13", "pytorch"], marker = "extra == 'dev'", git = "https://github.com/NVIDIA/TransformerEngine.git?rev=287770466f0f4433052260a765db5ff7b8be1320" },
+    { name = "transformer-engine", extras = ["core-cu13", "pytorch"], marker = "extra == 'dev'", git = "https://github.com/NVIDIA/TransformerEngine.git?rev=5671fd3675906cda1ade26c24a65d3dedd88eb89" },
     { name = "transformers", marker = "extra == 'mlm'" },
-    { name = "transformers", marker = "extra == 'training'" },
     { name = "wandb", marker = "extra == 'mlm'" },
-    { name = "wandb", marker = "extra == 'training'" },
     { name = "wget", marker = "extra == 'dev'" },
     { name = "wget", marker = "extra == 'lts'" },
 ]
-provides-extras = ["training", "mlm", "dev", "lts"]
+provides-extras = ["mlm", "dev", "lts"]
 
 [package.metadata.requires-dev]
 build = [
@@ -2819,12 +2828,11 @@ linting = [
     { name = "ruff", specifier = "~=0.9.0" },
 ]
 no-pypi-wheels = [
-    { name = "emerging-optimizers", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
-    { name = "flash-mla", git = "https://github.com/deepseek-ai/FlashMLA?rev=9edee0c022cd0938148a18e334203b0aab43aa19" },
+    { name = "emerging-optimizers", marker = "python_full_version >= '3.12'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.2.0" },
+    { name = "fast-hadamard-transform", git = "https://github.com/Dao-AILab/fast-hadamard-transform.git?rev=f134af63deb2df17e1171a9ec1ea4a7d8604d5ca" },
 ]
 test = [
     { name = "coverage" },
-    { name = "mock" },
     { name = "nemo-run", git = "https://github.com/NVIDIA-NeMo/Run.git?rev=01a9a8ba360f7b2908728ad0516e0ad9d936966d" },
     { name = "nltk" },
     { name = "pydantic" },
@@ -2848,7 +2856,7 @@ dependencies = [
     { name = "click" },
     { name = "multi-storage-client" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pillow" },
     { name = "pyyaml" },
     { name = "s3fs" },
@@ -2877,7 +2885,7 @@ version = "0.5.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0e/4a/c27b42ed9b1c7d13d9ba8b6905dece787d6259152f2309338aed29b2447b/ml_dtypes-0.5.4.tar.gz", hash = "sha256:8ab06a50fb9bf9666dd0fe5dfb4676fa2b0ac0f31ecff72a6c3af8e22c063453", size = 692314, upload-time = "2025-11-17T22:32:31.031Z" }
 wheels = [
@@ -2917,15 +2925,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ad/3f/3d42e9a78fe5edf792a83c074b13b9b770092a4fbf3462872f4303135f09/ml_dtypes-0.5.4-cp314-cp314t-win_arm64.whl", hash = "sha256:11942cbf2cf92157db91e5022633c0d9474d4dfd813a909383bd23ce828a4b7d", size = 168825, upload-time = "2025-11-17T22:32:23.766Z" },
 ]
 
-[[package]]
-name = "mock"
-version = "5.2.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/07/8c/14c2ae915e5f9dca5a22edd68b35be94400719ccfa068a03e0fb63d0f6f6/mock-5.2.0.tar.gz", hash = "sha256:4e460e818629b4b173f32d08bf30d3af8123afbb8e04bb5707a1fd4799e503f0", size = 92796, upload-time = "2025-03-03T12:31:42.911Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bd/d9/617e6af809bf3a1d468e0d58c3997b1dc219a9a9202e650d30c2fc85d481/mock-5.2.0-py3-none-any.whl", hash = "sha256:7ba87f72ca0e915175596069dbbcc7c75af7b5e9b9bc107ad6349ede0819982f", size = 31617, upload-time = "2025-03-03T12:31:41.518Z" },
-]
-
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@@ -2998,7 +2997,7 @@ wheels = [
 
 [[package]]
 name = "multi-storage-client"
-version = "0.44.0"
+version = "0.42.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -3016,18 +3015,18 @@ dependencies = [
     { name = "xattr" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/26/2a/4041592b18f9a84353974bb89124991b27c9c65d3b47324a161f6edad11f/multi_storage_client-0.44.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:28e68b3ac8475fbc373707eb279e8f3bdfc297df836794ed986ddd6b6ae52a05", size = 9058162, upload-time = "2026-03-06T22:01:32.175Z" },
-    { url = "https://files.pythonhosted.org/packages/69/89/b81846327d51d34635cba8eed047175706aca19bd53495ff1d97777d3ad0/multi_storage_client-0.44.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7120603e9b1936c1919493bfdbfc80084f08d2411b82f09e6384525338666451", size = 5400815, upload-time = "2026-03-06T22:03:06.163Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/a5/26640a2d8883719da42c5516962dfa5b71fbdd2e2e2ad3b4d489039233b0/multi_storage_client-0.44.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d9c163d1c5c870df1adbc993d58f884006fcd62727fb90b4a5c429373f62778", size = 5604930, upload-time = "2026-03-06T21:57:19.147Z" },
-    { url = "https://files.pythonhosted.org/packages/80/54/1489bf146831c73dec8b620fba0d9651a8506f0e7513f2b1bafa5bf3dd30/multi_storage_client-0.44.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:fa900ac3b781707feba16c8da2562ec84ca4f1b7d99c7bce88b6311882e0cb22", size = 9057010, upload-time = "2026-03-06T22:01:55.899Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/22/fb4131638365a0d49393ebb2761608ed393e4b6fbeb661ff28cd82b3ce3a/multi_storage_client-0.44.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d06c99da4cbfb2f600d4ca6ae0f6daf9c6842f5f162e5fedb0f9cbd75c29d5f", size = 5401057, upload-time = "2026-03-06T22:01:09.262Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/26/6daf323ec45761b20ae1de6d9ef8b4013a17eae086b54ec225edb9e5bbf4/multi_storage_client-0.44.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81581731532dba0a40ea5ff0bb04e512b3ca458db69306902ffa1c047e02bfe0", size = 5606993, upload-time = "2026-03-06T22:02:42.929Z" },
-    { url = "https://files.pythonhosted.org/packages/af/bd/6c66e4546ecda95da63b882eafa6d9659af032282f924b01e046ac057290/multi_storage_client-0.44.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:045a1973602b8eaacb1ab86d363c58d901d8768c6b0bb79c0088e417b3aabb0f", size = 9047082, upload-time = "2026-03-06T21:59:01.542Z" },
-    { url = "https://files.pythonhosted.org/packages/61/5a/3fd40749a360145e218a7a7bd8db562f650954404f55b8a219843ec709a2/multi_storage_client-0.44.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f3d5456acfac890fd5291a4776ab57c38ffecceb160ede324836c2a02427d18", size = 5400355, upload-time = "2026-03-06T22:02:19.753Z" },
-    { url = "https://files.pythonhosted.org/packages/41/c8/aa938f0602fd7d73dc541c1783d3ce2ddb5f9ea2121122f83e75f946c014/multi_storage_client-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a0a0f8951d722b719cb7d68cd36d639bad3664cf5ac823611b83056541e7b8f", size = 5610085, upload-time = "2026-03-06T21:57:42.001Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/cf/47ac58a9ce3913fc43feef4f2d327a07367c171c5d7f0f684eb5f2df9a6e/multi_storage_client-0.44.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f82f58b71dbcb07516fa1732ebd7df53cd62e65a9e50d694a1db6bb0cb5fbd29", size = 9044542, upload-time = "2026-03-06T21:58:37.73Z" },
-    { url = "https://files.pythonhosted.org/packages/45/62/53b69c19554146345d33cb20cdae1516e2f2e5a35edee99a7c0ce58c7720/multi_storage_client-0.44.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dd49feebdcd513a0f35701b8af03eb653f1b0efac7e8d686ee6c1bcdc40cbe5", size = 5399626, upload-time = "2026-03-06T21:58:05.093Z" },
-    { url = "https://files.pythonhosted.org/packages/60/61/055349d323aa74e8b792d99d731660f52d0be7b16c2072486773b11b5fa7/multi_storage_client-0.44.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17612cc379be566d6abb81a6e3a4732ee73a30717a6e2d3432db071c7b4e3d13", size = 5609202, upload-time = "2026-03-06T21:56:55.451Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/bb/6d7d9c53ce7b834cd7539ac579816c1482095127fc69a698750db21b1059/multi_storage_client-0.42.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:aa58acbea25b78dd902ce07b080d3feb6a80e51154c711449a0751f8cd37742e", size = 8805264, upload-time = "2026-02-06T20:58:49.246Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/57/f9bec92d9a76467898a4ebdf501182151c6b5dc6d00a0a89a374b7f70ad3/multi_storage_client-0.42.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa278cc2bb7cdf80bc3407ced7d8a8b258801093b90e720059f6c4cdc5d68085", size = 5154902, upload-time = "2026-02-06T21:04:05.221Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/2c/a417437abcc5c8ab0396fddfe9a158ea60e770e8d461ed0b2146a8efbf62/multi_storage_client-0.42.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32baa11cd3ce853f2072620134fe92e2ed3d682355fae2f86226c366717814ce", size = 5422721, upload-time = "2026-02-06T20:59:29.687Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/90/1e69cb6d71418b38a9409b0e2564efe1e7c12e18e63e478591ae0317dbcc/multi_storage_client-0.42.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3db30610d6bb15a5c211af9d7b11c8a1a13265893c1a625d5aaadacdb61a9a8e", size = 8805275, upload-time = "2026-02-06T20:58:10.943Z" },
+    { url = "https://files.pythonhosted.org/packages/de/dd/a55dc9e60113f98af10075c3e33b97007bfbfd2e6f8bc6a1b2b1b43857c8/multi_storage_client-0.42.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8c2d491475eec5e80ad706eca7005d9bd17d30b29166e891c18695b42336493", size = 5155309, upload-time = "2026-02-06T20:56:22.528Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/b6/648a1d6b4482634fbb0d5bc0ba156b42fafd4f364227f9203bc4ac70dbac/multi_storage_client-0.42.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91a87e05e0e09b8fbd6804bb1ac85a28213d4371e91d06d9c35ad12b247f28ec", size = 5422770, upload-time = "2026-02-06T21:01:41.97Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/5a/6af92f30d09c97a314594029c115da0c44d5fa14e772983d88ad8023d355/multi_storage_client-0.42.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5c71c128b9f81cfbd59f1e2c2acfb2559658dfecde904496b7845901f0161430", size = 8798046, upload-time = "2026-02-06T21:02:32.674Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/b2/e686bcbe754bfede1773153d928422b2c4b25453faf0e228cf9cadfa73e0/multi_storage_client-0.42.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afe72fcb3f44ddc23488ab65bbab8575181fe15f63d297074703a36f4d8f7cc9", size = 5155767, upload-time = "2026-02-06T21:01:02.151Z" },
+    { url = "https://files.pythonhosted.org/packages/05/44/2b7e0ec6fa68f208cb919c38df346cca37c910906f564a43f74731eb6cdb/multi_storage_client-0.42.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30410d59d1f93758640a15779af6379a961bfa0f9607809a2b869e8b750efac7", size = 5421800, upload-time = "2026-02-06T21:03:04.852Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/ba/c342143f3820a1debd223149bb362246c983b6b6ef70ad245b0d9cfc8509/multi_storage_client-0.42.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:dde8cbbd066f2756f5fc7efe7f2be713a4b9212f28ddd49dc9d8008148e86e97", size = 8797502, upload-time = "2026-02-06T21:03:27.859Z" },
+    { url = "https://files.pythonhosted.org/packages/52/9c/43cfac582592df71723add55a40b7007c6c6412e4188e44a752ff5376a85/multi_storage_client-0.42.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2321ab65f464aeee17c91d3e999ab9df42ba7fb8e8e67ee764f3f76c9b11a2f", size = 5155687, upload-time = "2026-02-06T20:58:30.288Z" },
+    { url = "https://files.pythonhosted.org/packages/54/b6/c745f2bc357ba83373ad655fa7296a21e9022402cbab811dbd22aed2f87f/multi_storage_client-0.42.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c305df6d59e81f909c6a23d35fc3d0ceaac723238457f236a3f55261db0b5bae", size = 5422017, upload-time = "2026-02-06T20:56:45.672Z" },
 ]
 
 [[package]]
@@ -3172,23 +3171,8 @@ wheels = [
 name = "multiprocess"
 version = "0.70.18"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'win32'",
-    "python_full_version == '3.12.*' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'win32'",
-    "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version < '3.11'",
-    "python_full_version >= '3.14' and sys_platform == 'win32'",
-]
 dependencies = [
-    { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'win32'" },
+    { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" } },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" }
 wheels = [
@@ -3206,32 +3190,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
 ]
 
-[[package]]
-name = "multiprocess"
-version = "0.70.19"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'win32'",
-]
-dependencies = [
-    { name = "dill", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a2/f2/e783ac7f2aeeed14e9e12801f22529cc7e6b7ab80928d6dcce4e9f00922d/multiprocess-0.70.19.tar.gz", hash = "sha256:952021e0e6c55a4a9fe4cd787895b86e239a40e76802a789d6305398d3975897", size = 2079989, upload-time = "2026-01-19T06:47:39.744Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8b/b6/10832f96b499690854e574360be342a282f5f7dba58eff791299ff6c0637/multiprocess-0.70.19-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:02e5c35d7d6cd2bdc89c1858867f7bde4012837411023a4696c148c1bdd7c80e", size = 135131, upload-time = "2026-01-19T06:47:20.479Z" },
-    { url = "https://files.pythonhosted.org/packages/99/50/faef2d8106534b0dc4a0b772668a1a99682696ebf17d3c0f13f2ed6a656a/multiprocess-0.70.19-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:79576c02d1207ec405b00cabf2c643c36070800cca433860e14539df7818b2aa", size = 135131, upload-time = "2026-01-19T06:47:21.879Z" },
-    { url = "https://files.pythonhosted.org/packages/94/b1/0b71d18b76bf423c2e8ee00b31db37d17297ab3b4db44e188692afdca628/multiprocess-0.70.19-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c6b6d78d43a03b68014ca1f0b7937d965393a670c5de7c29026beb2258f2f896", size = 135134, upload-time = "2026-01-19T06:47:23.262Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/aa/714635c727dbfc251139226fa4eaf1b07f00dc12d9cd2eb25f931adaf873/multiprocess-0.70.19-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1bbf1b69af1cf64cd05f65337d9215b88079ec819cd0ea7bac4dab84e162efe7", size = 144743, upload-time = "2026-01-19T06:47:24.562Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/e1/155f6abf5e6b5d9cef29b6d0167c180846157a4aca9b9bee1a217f67c959/multiprocess-0.70.19-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:5be9ec7f0c1c49a4f4a6fd20d5dda4aeabc2d39a50f4ad53720f1cd02b3a7c2e", size = 144738, upload-time = "2026-01-19T06:47:26.636Z" },
-    { url = "https://files.pythonhosted.org/packages/af/cb/f421c2869d75750a4f32301cc20c4b63fab6376e9a75c8e5e655bdeb3d9b/multiprocess-0.70.19-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1c3dce098845a0db43b32a0b76a228ca059a668071cfeaa0f40c36c0b1585d45", size = 144741, upload-time = "2026-01-19T06:47:27.985Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/45/8004d1e6b9185c1a444d6b55ac5682acf9d98035e54386d967366035a03a/multiprocess-0.70.19-py310-none-any.whl", hash = "sha256:97404393419dcb2a8385910864eedf47a3cadf82c66345b44f036420eb0b5d87", size = 134948, upload-time = "2026-01-19T06:47:32.325Z" },
-    { url = "https://files.pythonhosted.org/packages/86/c2/dec9722dc3474c164a0b6bcd9a7ed7da542c98af8cabce05374abab35edd/multiprocess-0.70.19-py311-none-any.whl", hash = "sha256:928851ae7973aea4ce0eaf330bbdafb2e01398a91518d5c8818802845564f45c", size = 144457, upload-time = "2026-01-19T06:47:33.711Z" },
-    { url = "https://files.pythonhosted.org/packages/71/70/38998b950a97ea279e6bd657575d22d1a2047256caf707d9a10fbce4f065/multiprocess-0.70.19-py312-none-any.whl", hash = "sha256:3a56c0e85dd5025161bac5ce138dcac1e49174c7d8e74596537e729fd5c53c28", size = 150281, upload-time = "2026-01-19T06:47:35.037Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/74/d2c27e03cb84251dfe7249b8e82923643c6d48fa4883b9476b025e7dc7eb/multiprocess-0.70.19-py313-none-any.whl", hash = "sha256:8d5eb4ec5017ba2fab4e34a747c6d2c2b6fecfe9e7236e77988db91580ada952", size = 156414, upload-time = "2026-01-19T06:47:35.915Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/61/af9115673a5870fd885247e2f1b68c4f1197737da315b520a91c757a861a/multiprocess-0.70.19-py314-none-any.whl", hash = "sha256:e8cc7fbdff15c0613f0a1f1f8744bef961b0a164c0ca29bdff53e9d2d93c5e5f", size = 160318, upload-time = "2026-01-19T06:47:37.497Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/82/69e539c4c2027f1e1697e09aaa2449243085a0edf81ae2c6341e84d769b6/multiprocess-0.70.19-py39-none-any.whl", hash = "sha256:0d4b4397ed669d371c81dcd1ef33fd384a44d6c3de1bd0ca7ac06d837720d3c5", size = 133477, upload-time = "2026-01-19T06:47:38.619Z" },
-]
-
 [[package]]
 name = "mypy-extensions"
 version = "1.1.0"
@@ -3246,7 +3204,8 @@ name = "myst-parser"
 version = "4.0.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.11'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -3266,18 +3225,22 @@ name = "myst-parser"
 version = "5.0.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
     { name = "docutils", version = "0.22.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -3320,7 +3283,8 @@ name = "networkx"
 version = "3.4.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.11'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload-time = "2024-10-21T12:39:38.695Z" }
 wheels = [
@@ -3332,18 +3296,22 @@ name = "networkx"
 version = "3.6.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" }
 wheels = [
@@ -3396,7 +3364,8 @@ name = "numpy"
 version = "2.2.6"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.11'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" }
 wheels = [
@@ -3458,96 +3427,112 @@ wheels = [
 
 [[package]]
 name = "numpy"
-version = "2.4.3"
+version = "2.4.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/10/8b/c265f4823726ab832de836cdd184d0986dcf94480f81e8739692a7ac7af2/numpy-2.4.3.tar.gz", hash = "sha256:483a201202b73495f00dbc83796c6ae63137a9bdade074f7648b3e32613412dd", size = 20727743, upload-time = "2026-03-09T07:58:53.426Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f9/51/5093a2df15c4dc19da3f79d1021e891f5dcf1d9d1db6ba38891d5590f3fe/numpy-2.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:33b3bf58ee84b172c067f56aeadc7ee9ab6de69c5e800ab5b10295d54c581adb", size = 16957183, upload-time = "2026-03-09T07:55:57.774Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/7c/c061f3de0630941073d2598dc271ac2f6cbcf5c83c74a5870fea07488333/numpy-2.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ba7b51e71c05aa1f9bc3641463cd82308eab40ce0d5c7e1fd4038cbf9938147", size = 14968734, upload-time = "2026-03-09T07:56:00.494Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/27/d26c85cbcd86b26e4f125b0668e7a7c0542d19dd7d23ee12e87b550e95b5/numpy-2.4.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a1988292870c7cb9d0ebb4cc96b4d447513a9644801de54606dc7aabf2b7d920", size = 5475288, upload-time = "2026-03-09T07:56:02.857Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/09/3c4abbc1dcd8010bf1a611d174c7aa689fc505585ec806111b4406f6f1b1/numpy-2.4.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:23b46bb6d8ecb68b58c09944483c135ae5f0e9b8d8858ece5e4ead783771d2a9", size = 6805253, upload-time = "2026-03-09T07:56:04.53Z" },
-    { url = "https://files.pythonhosted.org/packages/21/bc/e7aa3f6817e40c3f517d407742337cbb8e6fc4b83ce0b55ab780c829243b/numpy-2.4.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a016db5c5dba78fa8fe9f5d80d6708f9c42ab087a739803c0ac83a43d686a470", size = 15969479, upload-time = "2026-03-09T07:56:06.638Z" },
-    { url = "https://files.pythonhosted.org/packages/78/51/9f5d7a41f0b51649ddf2f2320595e15e122a40610b233d51928dd6c92353/numpy-2.4.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:715de7f82e192e8cae5a507a347d97ad17598f8e026152ca97233e3666daaa71", size = 16901035, upload-time = "2026-03-09T07:56:09.405Z" },
-    { url = "https://files.pythonhosted.org/packages/64/6e/b221dd847d7181bc5ee4857bfb026182ef69499f9305eb1371cbb1aea626/numpy-2.4.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2ddb7919366ee468342b91dea2352824c25b55814a987847b6c52003a7c97f15", size = 17325657, upload-time = "2026-03-09T07:56:12.067Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/b8/8f3fd2da596e1063964b758b5e3c970aed1949a05200d7e3d46a9d46d643/numpy-2.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a315e5234d88067f2d97e1f2ef670a7569df445d55400f1e33d117418d008d52", size = 18635512, upload-time = "2026-03-09T07:56:14.629Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/24/2993b775c37e39d2f8ab4125b44337ab0b2ba106c100980b7c274a22bee7/numpy-2.4.3-cp311-cp311-win32.whl", hash = "sha256:2b3f8d2c4589b1a2028d2a770b0fc4d1f332fb5e01521f4de3199a896d158ddd", size = 6238100, upload-time = "2026-03-09T07:56:17.243Z" },
-    { url = "https://files.pythonhosted.org/packages/76/1d/edccf27adedb754db7c4511d5eac8b83f004ae948fe2d3509e8b78097d4c/numpy-2.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:77e76d932c49a75617c6d13464e41203cd410956614d0a0e999b25e9e8d27eec", size = 12609816, upload-time = "2026-03-09T07:56:19.089Z" },
-    { url = "https://files.pythonhosted.org/packages/92/82/190b99153480076c8dce85f4cfe7d53ea84444145ffa54cb58dcd460d66b/numpy-2.4.3-cp311-cp311-win_arm64.whl", hash = "sha256:eb610595dd91560905c132c709412b512135a60f1851ccbd2c959e136431ff67", size = 10485757, upload-time = "2026-03-09T07:56:21.753Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/ed/6388632536f9788cea23a3a1b629f25b43eaacd7d7377e5d6bc7b9deb69b/numpy-2.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:61b0cbabbb6126c8df63b9a3a0c4b1f44ebca5e12ff6997b80fcf267fb3150ef", size = 16669628, upload-time = "2026-03-09T07:56:24.252Z" },
-    { url = "https://files.pythonhosted.org/packages/74/1b/ee2abfc68e1ce728b2958b6ba831d65c62e1b13ce3017c13943f8f9b5b2e/numpy-2.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7395e69ff32526710748f92cd8c9849b361830968ea3e24a676f272653e8983e", size = 14696872, upload-time = "2026-03-09T07:56:26.991Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/d1/780400e915ff5638166f11ca9dc2c5815189f3d7cf6f8759a1685e586413/numpy-2.4.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:abdce0f71dcb4a00e4e77f3faf05e4616ceccfe72ccaa07f47ee79cda3b7b0f4", size = 5203489, upload-time = "2026-03-09T07:56:29.414Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/bb/baffa907e9da4cc34a6e556d6d90e032f6d7a75ea47968ea92b4858826c4/numpy-2.4.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:48da3a4ee1336454b07497ff7ec83903efa5505792c4e6d9bf83d99dc07a1e18", size = 6550814, upload-time = "2026-03-09T07:56:32.225Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/12/8c9f0c6c95f76aeb20fc4a699c33e9f827fa0d0f857747c73bb7b17af945/numpy-2.4.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32e3bef222ad6b052280311d1d60db8e259e4947052c3ae7dd6817451fc8a4c5", size = 15666601, upload-time = "2026-03-09T07:56:34.461Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/79/cc665495e4d57d0aa6fbcc0aa57aa82671dfc78fbf95fe733ed86d98f52a/numpy-2.4.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e7dd01a46700b1967487141a66ac1a3cf0dd8ebf1f08db37d46389401512ca97", size = 16621358, upload-time = "2026-03-09T07:56:36.852Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/40/b4ecb7224af1065c3539f5ecfff879d090de09608ad1008f02c05c770cb3/numpy-2.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:76f0f283506c28b12bba319c0fab98217e9f9b54e6160e9c79e9f7348ba32e9c", size = 17016135, upload-time = "2026-03-09T07:56:39.337Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/b1/6a88e888052eed951afed7a142dcdf3b149a030ca59b4c71eef085858e43/numpy-2.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:737f630a337364665aba3b5a77e56a68cc42d350edd010c345d65a3efa3addcc", size = 18345816, upload-time = "2026-03-09T07:56:42.31Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/8f/103a60c5f8c3d7fc678c19cd7b2476110da689ccb80bc18050efbaeae183/numpy-2.4.3-cp312-cp312-win32.whl", hash = "sha256:26952e18d82a1dbbc2f008d402021baa8d6fc8e84347a2072a25e08b46d698b9", size = 5960132, upload-time = "2026-03-09T07:56:44.851Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/7c/f5ee1bf6ed888494978046a809df2882aad35d414b622893322df7286879/numpy-2.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:65f3c2455188f09678355f5cae1f959a06b778bc66d535da07bf2ef20cd319d5", size = 12316144, upload-time = "2026-03-09T07:56:47.057Z" },
-    { url = "https://files.pythonhosted.org/packages/71/46/8d1cb3f7a00f2fb6394140e7e6623696e54c6318a9d9691bb4904672cf42/numpy-2.4.3-cp312-cp312-win_arm64.whl", hash = "sha256:2abad5c7fef172b3377502bde47892439bae394a71bc329f31df0fd829b41a9e", size = 10220364, upload-time = "2026-03-09T07:56:49.849Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/d0/1fe47a98ce0df229238b77611340aff92d52691bcbc10583303181abf7fc/numpy-2.4.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b346845443716c8e542d54112966383b448f4a3ba5c66409771b8c0889485dd3", size = 16665297, upload-time = "2026-03-09T07:56:52.296Z" },
-    { url = "https://files.pythonhosted.org/packages/27/d9/4e7c3f0e68dfa91f21c6fb6cf839bc829ec920688b1ce7ec722b1a6202fb/numpy-2.4.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2629289168f4897a3c4e23dc98d6f1731f0fc0fe52fb9db19f974041e4cc12b9", size = 14691853, upload-time = "2026-03-09T07:56:54.992Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/66/bd096b13a87549683812b53ab211e6d413497f84e794fb3c39191948da97/numpy-2.4.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:bb2e3cf95854233799013779216c57e153c1ee67a0bf92138acca0e429aefaee", size = 5198435, upload-time = "2026-03-09T07:56:57.184Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/2f/687722910b5a5601de2135c891108f51dfc873d8e43c8ed9f4ebb440b4a2/numpy-2.4.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:7f3408ff897f8ab07a07fbe2823d7aee6ff644c097cc1f90382511fe982f647f", size = 6546347, upload-time = "2026-03-09T07:56:59.531Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/ec/7971c4e98d86c564750393fab8d7d83d0a9432a9d78bb8a163a6dc59967a/numpy-2.4.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:decb0eb8a53c3b009b0962378065589685d66b23467ef5dac16cbe818afde27f", size = 15664626, upload-time = "2026-03-09T07:57:01.385Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/eb/7daecbea84ec935b7fc732e18f532073064a3816f0932a40a17f3349185f/numpy-2.4.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5f51900414fc9204a0e0da158ba2ac52b75656e7dce7e77fb9f84bfa343b4cc", size = 16608916, upload-time = "2026-03-09T07:57:04.008Z" },
-    { url = "https://files.pythonhosted.org/packages/df/58/2a2b4a817ffd7472dca4421d9f0776898b364154e30c95f42195041dc03b/numpy-2.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6bd06731541f89cdc01b261ba2c9e037f1543df7472517836b78dfb15bd6e476", size = 17015824, upload-time = "2026-03-09T07:57:06.347Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/ca/627a828d44e78a418c55f82dd4caea8ea4a8ef24e5144d9e71016e52fb40/numpy-2.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22654fe6be0e5206f553a9250762c653d3698e46686eee53b399ab90da59bd92", size = 18334581, upload-time = "2026-03-09T07:57:09.114Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/c0/76f93962fc79955fcba30a429b62304332345f22d4daec1cb33653425643/numpy-2.4.3-cp313-cp313-win32.whl", hash = "sha256:d71e379452a2f670ccb689ec801b1218cd3983e253105d6e83780967e899d687", size = 5958618, upload-time = "2026-03-09T07:57:11.432Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/3c/88af0040119209b9b5cb59485fa48b76f372c73068dbf9254784b975ac53/numpy-2.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:0a60e17a14d640f49146cb38e3f105f571318db7826d9b6fef7e4dce758faecd", size = 12312824, upload-time = "2026-03-09T07:57:13.586Z" },
-    { url = "https://files.pythonhosted.org/packages/58/ce/3d07743aced3d173f877c3ef6a454c2174ba42b584ab0b7e6d99374f51ed/numpy-2.4.3-cp313-cp313-win_arm64.whl", hash = "sha256:c9619741e9da2059cd9c3f206110b97583c7152c1dc9f8aafd4beb450ac1c89d", size = 10221218, upload-time = "2026-03-09T07:57:16.183Z" },
-    { url = "https://files.pythonhosted.org/packages/62/09/d96b02a91d09e9d97862f4fc8bfebf5400f567d8eb1fe4b0cc4795679c15/numpy-2.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7aa4e54f6469300ebca1d9eb80acd5253cdfa36f2c03d79a35883687da430875", size = 14819570, upload-time = "2026-03-09T07:57:18.564Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/ca/0b1aba3905fdfa3373d523b2b15b19029f4f3031c87f4066bd9d20ef6c6b/numpy-2.4.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d1b90d840b25874cf5cd20c219af10bac3667db3876d9a495609273ebe679070", size = 5326113, upload-time = "2026-03-09T07:57:21.052Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/63/406e0fd32fcaeb94180fd6a4c41e55736d676c54346b7efbce548b94a914/numpy-2.4.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a749547700de0a20a6718293396ec237bb38218049cfce788e08fcb716e8cf73", size = 6646370, upload-time = "2026-03-09T07:57:22.804Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/d0/10f7dc157d4b37af92720a196be6f54f889e90dcd30dce9dc657ed92c257/numpy-2.4.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f3c4a151a2e529adf49c1d54f0f57ff8f9b233ee4d44af623a81553ab86368", size = 15723499, upload-time = "2026-03-09T07:57:24.693Z" },
-    { url = "https://files.pythonhosted.org/packages/66/f1/d1c2bf1161396629701bc284d958dc1efa3a5a542aab83cf11ee6eb4cba5/numpy-2.4.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22c31dc07025123aedf7f2db9e91783df13f1776dc52c6b22c620870dc0fab22", size = 16657164, upload-time = "2026-03-09T07:57:27.676Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/be/cca19230b740af199ac47331a21c71e7a3d0ba59661350483c1600d28c37/numpy-2.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:148d59127ac95979d6f07e4d460f934ebdd6eed641db9c0db6c73026f2b2101a", size = 17081544, upload-time = "2026-03-09T07:57:30.664Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/c5/9602b0cbb703a0936fb40f8a95407e8171935b15846de2f0776e08af04c7/numpy-2.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a97cbf7e905c435865c2d939af3d93f99d18eaaa3cabe4256f4304fb51604349", size = 18380290, upload-time = "2026-03-09T07:57:33.763Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/81/9f24708953cd30be9ee36ec4778f4b112b45165812f2ada4cc5ea1c1f254/numpy-2.4.3-cp313-cp313t-win32.whl", hash = "sha256:be3b8487d725a77acccc9924f65fd8bce9af7fac8c9820df1049424a2115af6c", size = 6082814, upload-time = "2026-03-09T07:57:36.491Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/9e/52f6eaa13e1a799f0ab79066c17f7016a4a8ae0c1aefa58c82b4dab690b4/numpy-2.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1ec84fd7c8e652b0f4aaaf2e6e9cc8eaa9b1b80a537e06b2e3a2fb176eedcb26", size = 12452673, upload-time = "2026-03-09T07:57:38.281Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/04/b8cece6ead0b30c9fbd99bb835ad7ea0112ac5f39f069788c5558e3b1ab2/numpy-2.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:120df8c0a81ebbf5b9020c91439fccd85f5e018a927a39f624845be194a2be02", size = 10290907, upload-time = "2026-03-09T07:57:40.747Z" },
-    { url = "https://files.pythonhosted.org/packages/70/ae/3936f79adebf8caf81bd7a599b90a561334a658be4dcc7b6329ebf4ee8de/numpy-2.4.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:5884ce5c7acfae1e4e1b6fde43797d10aa506074d25b531b4f54bde33c0c31d4", size = 16664563, upload-time = "2026-03-09T07:57:43.817Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/62/760f2b55866b496bb1fa7da2a6db076bef908110e568b02fcfc1422e2a3a/numpy-2.4.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:297837823f5bc572c5f9379b0c9f3a3365f08492cbdc33bcc3af174372ebb168", size = 14702161, upload-time = "2026-03-09T07:57:46.169Z" },
-    { url = "https://files.pythonhosted.org/packages/32/af/a7a39464e2c0a21526fb4fb76e346fb172ebc92f6d1c7a07c2c139cc17b1/numpy-2.4.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:a111698b4a3f8dcbe54c64a7708f049355abd603e619013c346553c1fd4ca90b", size = 5208738, upload-time = "2026-03-09T07:57:48.506Z" },
-    { url = "https://files.pythonhosted.org/packages/29/8c/2a0cf86a59558fa078d83805589c2de490f29ed4fb336c14313a161d358a/numpy-2.4.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:4bd4741a6a676770e0e97fe9ab2e51de01183df3dcbcec591d26d331a40de950", size = 6543618, upload-time = "2026-03-09T07:57:50.591Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/b8/612ce010c0728b1c363fa4ea3aa4c22fe1c5da1de008486f8c2f5cb92fae/numpy-2.4.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54f29b877279d51e210e0c80709ee14ccbbad647810e8f3d375561c45ef613dd", size = 15680676, upload-time = "2026-03-09T07:57:52.34Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/7e/4f120ecc54ba26ddf3dc348eeb9eb063f421de65c05fc961941798feea18/numpy-2.4.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:679f2a834bae9020f81534671c56fd0cc76dd7e5182f57131478e23d0dc59e24", size = 16613492, upload-time = "2026-03-09T07:57:54.91Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/86/1b6020db73be330c4b45d5c6ee4295d59cfeef0e3ea323959d053e5a6909/numpy-2.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d84f0f881cb2225c2dfd7f78a10a5645d487a496c6668d6cc39f0f114164f3d0", size = 17031789, upload-time = "2026-03-09T07:57:57.641Z" },
-    { url = "https://files.pythonhosted.org/packages/07/3a/3b90463bf41ebc21d1b7e06079f03070334374208c0f9a1f05e4ae8455e7/numpy-2.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d213c7e6e8d211888cc359bab7199670a00f5b82c0978b9d1c75baf1eddbeac0", size = 18339941, upload-time = "2026-03-09T07:58:00.577Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/74/6d736c4cd962259fd8bae9be27363eb4883a2f9069763747347544c2a487/numpy-2.4.3-cp314-cp314-win32.whl", hash = "sha256:52077feedeff7c76ed7c9f1a0428558e50825347b7545bbb8523da2cd55c547a", size = 6007503, upload-time = "2026-03-09T07:58:03.331Z" },
-    { url = "https://files.pythonhosted.org/packages/48/39/c56ef87af669364356bb011922ef0734fc49dad51964568634c72a009488/numpy-2.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:0448e7f9caefb34b4b7dd2b77f21e8906e5d6f0365ad525f9f4f530b13df2afc", size = 12444915, upload-time = "2026-03-09T07:58:06.353Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/1f/ab8528e38d295fd349310807496fabb7cf9fe2e1f70b97bc20a483ea9d4a/numpy-2.4.3-cp314-cp314-win_arm64.whl", hash = "sha256:b44fd60341c4d9783039598efadd03617fa28d041fc37d22b62d08f2027fa0e7", size = 10494875, upload-time = "2026-03-09T07:58:08.734Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/ef/b7c35e4d5ef141b836658ab21a66d1a573e15b335b1d111d31f26c8ef80f/numpy-2.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0a195f4216be9305a73c0e91c9b026a35f2161237cf1c6de9b681637772ea657", size = 14822225, upload-time = "2026-03-09T07:58:11.034Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/8d/7730fa9278cf6648639946cc816e7cc89f0d891602584697923375f801ed/numpy-2.4.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:cd32fbacb9fd1bf041bf8e89e4576b6f00b895f06d00914820ae06a616bdfef7", size = 5328769, upload-time = "2026-03-09T07:58:13.67Z" },
-    { url = "https://files.pythonhosted.org/packages/47/01/d2a137317c958b074d338807c1b6a383406cdf8b8e53b075d804cc3d211d/numpy-2.4.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:2e03c05abaee1f672e9d67bc858f300b5ccba1c21397211e8d77d98350972093", size = 6649461, upload-time = "2026-03-09T07:58:15.912Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/34/812ce12bc0f00272a4b0ec0d713cd237cb390666eb6206323d1cc9cedbb2/numpy-2.4.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d1ce23cce91fcea443320a9d0ece9b9305d4368875bab09538f7a5b4131938a", size = 15725809, upload-time = "2026-03-09T07:58:17.787Z" },
-    { url = "https://files.pythonhosted.org/packages/25/c0/2aed473a4823e905e765fee3dc2cbf504bd3e68ccb1150fbdabd5c39f527/numpy-2.4.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c59020932feb24ed49ffd03704fbab89f22aa9c0d4b180ff45542fe8918f5611", size = 16655242, upload-time = "2026-03-09T07:58:20.476Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/c8/7e052b2fc87aa0e86de23f20e2c42bd261c624748aa8efd2c78f7bb8d8c6/numpy-2.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9684823a78a6cd6ad7511fc5e25b07947d1d5b5e2812c93fe99d7d4195130720", size = 17080660, upload-time = "2026-03-09T07:58:23.067Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/3d/0876746044db2adcb11549f214d104f2e1be00f07a67edbb4e2812094847/numpy-2.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0200b25c687033316fb39f0ff4e3e690e8957a2c3c8d22499891ec58c37a3eb5", size = 18380384, upload-time = "2026-03-09T07:58:25.839Z" },
-    { url = "https://files.pythonhosted.org/packages/07/12/8160bea39da3335737b10308df4f484235fd297f556745f13092aa039d3b/numpy-2.4.3-cp314-cp314t-win32.whl", hash = "sha256:5e10da9e93247e554bb1d22f8edc51847ddd7dde52d85ce31024c1b4312bfba0", size = 6154547, upload-time = "2026-03-09T07:58:28.289Z" },
-    { url = "https://files.pythonhosted.org/packages/42/f3/76534f61f80d74cc9cdf2e570d3d4eeb92c2280a27c39b0aaf471eda7b48/numpy-2.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:45f003dbdffb997a03da2d1d0cb41fbd24a87507fb41605c0420a3db5bd4667b", size = 12633645, upload-time = "2026-03-09T07:58:30.384Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/b6/7c0d4334c15983cec7f92a69e8ce9b1e6f31857e5ee3a413ac424e6bd63d/numpy-2.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:4d382735cecd7bcf090172489a525cd7d4087bc331f7df9f60ddc9a296cf208e", size = 10565454, upload-time = "2026-03-09T07:58:33.031Z" },
-    { url = "https://files.pythonhosted.org/packages/64/e4/4dab9fb43c83719c29241c535d9e07be73bea4bc0c6686c5816d8e1b6689/numpy-2.4.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c6b124bfcafb9e8d3ed09130dbee44848c20b3e758b6bbf006e641778927c028", size = 16834892, upload-time = "2026-03-09T07:58:35.334Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/29/f8b6d4af90fed3dfda84ebc0df06c9833d38880c79ce954e5b661758aa31/numpy-2.4.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:76dbb9d4e43c16cf9aa711fcd8de1e2eeb27539dcefb60a1d5e9f12fae1d1ed8", size = 14893070, upload-time = "2026-03-09T07:58:37.7Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/04/a19b3c91dbec0a49269407f15d5753673a09832daed40c45e8150e6fa558/numpy-2.4.3-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:29363fbfa6f8ee855d7569c96ce524845e3d726d6c19b29eceec7dd555dab152", size = 5399609, upload-time = "2026-03-09T07:58:39.853Z" },
-    { url = "https://files.pythonhosted.org/packages/79/34/4d73603f5420eab89ea8a67097b31364bf7c30f811d4dd84b1659c7476d9/numpy-2.4.3-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:bc71942c789ef415a37f0d4eab90341425a00d538cd0642445d30b41023d3395", size = 6714355, upload-time = "2026-03-09T07:58:42.365Z" },
-    { url = "https://files.pythonhosted.org/packages/58/ad/1100d7229bb248394939a12a8074d485b655e8ed44207d328fdd7fcebc7b/numpy-2.4.3-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7e58765ad74dcebd3ef0208a5078fba32dc8ec3578fe84a604432950cd043d79", size = 15800434, upload-time = "2026-03-09T07:58:44.837Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/fd/16d710c085d28ba4feaf29ac60c936c9d662e390344f94a6beaa2ac9899b/numpy-2.4.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e236dbda4e1d319d681afcbb136c0c4a8e0f1a5c58ceec2adebb547357fe857", size = 16729409, upload-time = "2026-03-09T07:58:47.972Z" },
-    { url = "https://files.pythonhosted.org/packages/57/a7/b35835e278c18b85206834b3aa3abe68e77a98769c59233d1f6300284781/numpy-2.4.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:4b42639cdde6d24e732ff823a3fa5b701d8acad89c4142bc1d0bd6dc85200ba5", size = 12504685, upload-time = "2026-03-09T07:58:50.525Z" },
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/fd/0005efbd0af48e55eb3c7208af93f2862d4b1a56cd78e84309a2d959208d/numpy-2.4.2.tar.gz", hash = "sha256:659a6107e31a83c4e33f763942275fd278b21d095094044eb35569e86a21ddae", size = 20723651, upload-time = "2026-01-31T23:13:10.135Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d3/44/71852273146957899753e69986246d6a176061ea183407e95418c2aa4d9a/numpy-2.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7e88598032542bd49af7c4747541422884219056c268823ef6e5e89851c8825", size = 16955478, upload-time = "2026-01-31T23:10:25.623Z" },
+    { url = "https://files.pythonhosted.org/packages/74/41/5d17d4058bd0cd96bcbd4d9ff0fb2e21f52702aab9a72e4a594efa18692f/numpy-2.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7edc794af8b36ca37ef5fcb5e0d128c7e0595c7b96a2318d1badb6fcd8ee86b1", size = 14965467, upload-time = "2026-01-31T23:10:28.186Z" },
+    { url = "https://files.pythonhosted.org/packages/49/48/fb1ce8136c19452ed15f033f8aee91d5defe515094e330ce368a0647846f/numpy-2.4.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:6e9f61981ace1360e42737e2bae58b27bf28a1b27e781721047d84bd754d32e7", size = 5475172, upload-time = "2026-01-31T23:10:30.848Z" },
+    { url = "https://files.pythonhosted.org/packages/40/a9/3feb49f17bbd1300dd2570432961f5c8a4ffeff1db6f02c7273bd020a4c9/numpy-2.4.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:cb7bbb88aa74908950d979eeaa24dbdf1a865e3c7e45ff0121d8f70387b55f73", size = 6805145, upload-time = "2026-01-31T23:10:32.352Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/39/fdf35cbd6d6e2fcad42fcf85ac04a85a0d0fbfbf34b30721c98d602fd70a/numpy-2.4.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f069069931240b3fc703f1e23df63443dbd6390614c8c44a87d96cd0ec81eb1", size = 15966084, upload-time = "2026-01-31T23:10:34.502Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/46/6fa4ea94f1ddf969b2ee941290cca6f1bfac92b53c76ae5f44afe17ceb69/numpy-2.4.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c02ef4401a506fb60b411467ad501e1429a3487abca4664871d9ae0b46c8ba32", size = 16899477, upload-time = "2026-01-31T23:10:37.075Z" },
+    { url = "https://files.pythonhosted.org/packages/09/a1/2a424e162b1a14a5bd860a464ab4e07513916a64ab1683fae262f735ccd2/numpy-2.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2653de5c24910e49c2b106499803124dde62a5a1fe0eedeaecf4309a5f639390", size = 17323429, upload-time = "2026-01-31T23:10:39.704Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/a2/73014149ff250628df72c58204822ac01d768697913881aacf839ff78680/numpy-2.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1ae241bbfc6ae276f94a170b14785e561cb5e7f626b6688cf076af4110887413", size = 18635109, upload-time = "2026-01-31T23:10:41.924Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/0c/73e8be2f1accd56df74abc1c5e18527822067dced5ec0861b5bb882c2ce0/numpy-2.4.2-cp311-cp311-win32.whl", hash = "sha256:df1b10187212b198dd45fa943d8985a3c8cf854aed4923796e0e019e113a1bda", size = 6237915, upload-time = "2026-01-31T23:10:45.26Z" },
+    { url = "https://files.pythonhosted.org/packages/76/ae/e0265e0163cf127c24c3969d29f1c4c64551a1e375d95a13d32eab25d364/numpy-2.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:b9c618d56a29c9cb1c4da979e9899be7578d2e0b3c24d52079c166324c9e8695", size = 12607972, upload-time = "2026-01-31T23:10:47.021Z" },
+    { url = "https://files.pythonhosted.org/packages/29/a5/c43029af9b8014d6ea157f192652c50042e8911f4300f8f6ed3336bf437f/numpy-2.4.2-cp311-cp311-win_arm64.whl", hash = "sha256:47c5a6ed21d9452b10227e5e8a0e1c22979811cad7dcc19d8e3e2fb8fa03f1a3", size = 10485763, upload-time = "2026-01-31T23:10:50.087Z" },
+    { url = "https://files.pythonhosted.org/packages/51/6e/6f394c9c77668153e14d4da83bcc247beb5952f6ead7699a1a2992613bea/numpy-2.4.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:21982668592194c609de53ba4933a7471880ccbaadcc52352694a59ecc860b3a", size = 16667963, upload-time = "2026-01-31T23:10:52.147Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f8/55483431f2b2fd015ae6ed4fe62288823ce908437ed49db5a03d15151678/numpy-2.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40397bda92382fcec844066efb11f13e1c9a3e2a8e8f318fb72ed8b6db9f60f1", size = 14693571, upload-time = "2026-01-31T23:10:54.789Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/20/18026832b1845cdc82248208dd929ca14c9d8f2bac391f67440707fff27c/numpy-2.4.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:b3a24467af63c67829bfaa61eecf18d5432d4f11992688537be59ecd6ad32f5e", size = 5203469, upload-time = "2026-01-31T23:10:57.343Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/33/2eb97c8a77daaba34eaa3fa7241a14ac5f51c46a6bd5911361b644c4a1e2/numpy-2.4.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:805cc8de9fd6e7a22da5aed858e0ab16be5a4db6c873dde1d7451c541553aa27", size = 6550820, upload-time = "2026-01-31T23:10:59.429Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/91/b97fdfd12dc75b02c44e26c6638241cc004d4079a0321a69c62f51470c4c/numpy-2.4.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d82351358ffbcdcd7b686b90742a9b86632d6c1c051016484fa0b326a0a1548", size = 15663067, upload-time = "2026-01-31T23:11:01.291Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/c6/a18e59f3f0b8071cc85cbc8d80cd02d68aa9710170b2553a117203d46936/numpy-2.4.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e35d3e0144137d9fdae62912e869136164534d64a169f86438bc9561b6ad49f", size = 16619782, upload-time = "2026-01-31T23:11:03.669Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/83/9751502164601a79e18847309f5ceec0b1446d7b6aa12305759b72cf98b2/numpy-2.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adb6ed2ad29b9e15321d167d152ee909ec73395901b70936f029c3bc6d7f4460", size = 17013128, upload-time = "2026-01-31T23:11:05.913Z" },
+    { url = "https://files.pythonhosted.org/packages/61/c4/c4066322256ec740acc1c8923a10047818691d2f8aec254798f3dd90f5f2/numpy-2.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8906e71fd8afcb76580404e2a950caef2685df3d2a57fe82a86ac8d33cc007ba", size = 18345324, upload-time = "2026-01-31T23:11:08.248Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/af/6157aa6da728fa4525a755bfad486ae7e3f76d4c1864138003eb84328497/numpy-2.4.2-cp312-cp312-win32.whl", hash = "sha256:ec055f6dae239a6299cace477b479cca2fc125c5675482daf1dd886933a1076f", size = 5960282, upload-time = "2026-01-31T23:11:10.497Z" },
+    { url = "https://files.pythonhosted.org/packages/92/0f/7ceaaeaacb40567071e94dbf2c9480c0ae453d5bb4f52bea3892c39dc83c/numpy-2.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:209fae046e62d0ce6435fcfe3b1a10537e858249b3d9b05829e2a05218296a85", size = 12314210, upload-time = "2026-01-31T23:11:12.176Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/a3/56c5c604fae6dd40fa2ed3040d005fca97e91bd320d232ac9931d77ba13c/numpy-2.4.2-cp312-cp312-win_arm64.whl", hash = "sha256:fbde1b0c6e81d56f5dccd95dd4a711d9b95df1ae4009a60887e56b27e8d903fa", size = 10220171, upload-time = "2026-01-31T23:11:14.684Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/22/815b9fe25d1d7ae7d492152adbc7226d3eff731dffc38fe970589fcaaa38/numpy-2.4.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:25f2059807faea4b077a2b6837391b5d830864b3543627f381821c646f31a63c", size = 16663696, upload-time = "2026-01-31T23:11:17.516Z" },
+    { url = "https://files.pythonhosted.org/packages/09/f0/817d03a03f93ba9c6c8993de509277d84e69f9453601915e4a69554102a1/numpy-2.4.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bd3a7a9f5847d2fb8c2c6d1c862fa109c31a9abeca1a3c2bd5a64572955b2979", size = 14688322, upload-time = "2026-01-31T23:11:19.883Z" },
+    { url = "https://files.pythonhosted.org/packages/da/b4/f805ab79293c728b9a99438775ce51885fd4f31b76178767cfc718701a39/numpy-2.4.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8e4549f8a3c6d13d55041925e912bfd834285ef1dd64d6bc7d542583355e2e98", size = 5198157, upload-time = "2026-01-31T23:11:22.375Z" },
+    { url = "https://files.pythonhosted.org/packages/74/09/826e4289844eccdcd64aac27d13b0fd3f32039915dd5b9ba01baae1f436c/numpy-2.4.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:aea4f66ff44dfddf8c2cffd66ba6538c5ec67d389285292fe428cb2c738c8aef", size = 6546330, upload-time = "2026-01-31T23:11:23.958Z" },
+    { url = "https://files.pythonhosted.org/packages/19/fb/cbfdbfa3057a10aea5422c558ac57538e6acc87ec1669e666d32ac198da7/numpy-2.4.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3cd545784805de05aafe1dde61752ea49a359ccba9760c1e5d1c88a93bbf2b7", size = 15660968, upload-time = "2026-01-31T23:11:25.713Z" },
+    { url = "https://files.pythonhosted.org/packages/04/dc/46066ce18d01645541f0186877377b9371b8fa8017fa8262002b4ef22612/numpy-2.4.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0d9b7c93578baafcbc5f0b83eaf17b79d345c6f36917ba0c67f45226911d499", size = 16607311, upload-time = "2026-01-31T23:11:28.117Z" },
+    { url = "https://files.pythonhosted.org/packages/14/d9/4b5adfc39a43fa6bf918c6d544bc60c05236cc2f6339847fc5b35e6cb5b0/numpy-2.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f74f0f7779cc7ae07d1810aab8ac6b1464c3eafb9e283a40da7309d5e6e48fbb", size = 17012850, upload-time = "2026-01-31T23:11:30.888Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/20/adb6e6adde6d0130046e6fdfb7675cc62bc2f6b7b02239a09eb58435753d/numpy-2.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c7ac672d699bf36275c035e16b65539931347d68b70667d28984c9fb34e07fa7", size = 18334210, upload-time = "2026-01-31T23:11:33.214Z" },
+    { url = "https://files.pythonhosted.org/packages/78/0e/0a73b3dff26803a8c02baa76398015ea2a5434d9b8265a7898a6028c1591/numpy-2.4.2-cp313-cp313-win32.whl", hash = "sha256:8e9afaeb0beff068b4d9cd20d322ba0ee1cecfb0b08db145e4ab4dd44a6b5110", size = 5958199, upload-time = "2026-01-31T23:11:35.385Z" },
+    { url = "https://files.pythonhosted.org/packages/43/bc/6352f343522fcb2c04dbaf94cb30cca6fd32c1a750c06ad6231b4293708c/numpy-2.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:7df2de1e4fba69a51c06c28f5a3de36731eb9639feb8e1cf7e4a7b0daf4cf622", size = 12310848, upload-time = "2026-01-31T23:11:38.001Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/8d/6da186483e308da5da1cc6918ce913dcfe14ffde98e710bfeff2a6158d4e/numpy-2.4.2-cp313-cp313-win_arm64.whl", hash = "sha256:0fece1d1f0a89c16b03442eae5c56dc0be0c7883b5d388e0c03f53019a4bfd71", size = 10221082, upload-time = "2026-01-31T23:11:40.392Z" },
+    { url = "https://files.pythonhosted.org/packages/25/a1/9510aa43555b44781968935c7548a8926274f815de42ad3997e9e83680dd/numpy-2.4.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5633c0da313330fd20c484c78cdd3f9b175b55e1a766c4a174230c6b70ad8262", size = 14815866, upload-time = "2026-01-31T23:11:42.495Z" },
+    { url = "https://files.pythonhosted.org/packages/36/30/6bbb5e76631a5ae46e7923dd16ca9d3f1c93cfa8d4ed79a129814a9d8db3/numpy-2.4.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d9f64d786b3b1dd742c946c42d15b07497ed14af1a1f3ce840cce27daa0ce913", size = 5325631, upload-time = "2026-01-31T23:11:44.7Z" },
+    { url = "https://files.pythonhosted.org/packages/46/00/3a490938800c1923b567b3a15cd17896e68052e2145d8662aaf3e1ffc58f/numpy-2.4.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:b21041e8cb6a1eb5312dd1d2f80a94d91efffb7a06b70597d44f1bd2dfc315ab", size = 6646254, upload-time = "2026-01-31T23:11:46.341Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/e9/fac0890149898a9b609caa5af7455a948b544746e4b8fe7c212c8edd71f8/numpy-2.4.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:00ab83c56211a1d7c07c25e3217ea6695e50a3e2f255053686b081dc0b091a82", size = 15720138, upload-time = "2026-01-31T23:11:48.082Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/5c/08887c54e68e1e28df53709f1893ce92932cc6f01f7c3d4dc952f61ffd4e/numpy-2.4.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2fb882da679409066b4603579619341c6d6898fc83a8995199d5249f986e8e8f", size = 16655398, upload-time = "2026-01-31T23:11:50.293Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/89/253db0fa0e66e9129c745e4ef25631dc37d5f1314dad2b53e907b8538e6d/numpy-2.4.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:66cb9422236317f9d44b67b4d18f44efe6e9c7f8794ac0462978513359461554", size = 17079064, upload-time = "2026-01-31T23:11:52.927Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/d5/cbade46ce97c59c6c3da525e8d95b7abe8a42974a1dc5c1d489c10433e88/numpy-2.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0f01dcf33e73d80bd8dc0f20a71303abbafa26a19e23f6b68d1aa9990af90257", size = 18379680, upload-time = "2026-01-31T23:11:55.22Z" },
+    { url = "https://files.pythonhosted.org/packages/40/62/48f99ae172a4b63d981babe683685030e8a3df4f246c893ea5c6ef99f018/numpy-2.4.2-cp313-cp313t-win32.whl", hash = "sha256:52b913ec40ff7ae845687b0b34d8d93b60cb66dcee06996dd5c99f2fc9328657", size = 6082433, upload-time = "2026-01-31T23:11:58.096Z" },
+    { url = "https://files.pythonhosted.org/packages/07/38/e054a61cfe48ad9f1ed0d188e78b7e26859d0b60ef21cd9de4897cdb5326/numpy-2.4.2-cp313-cp313t-win_amd64.whl", hash = "sha256:5eea80d908b2c1f91486eb95b3fb6fab187e569ec9752ab7d9333d2e66bf2d6b", size = 12451181, upload-time = "2026-01-31T23:11:59.782Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/a4/a05c3a6418575e185dd84d0b9680b6bb2e2dc3e4202f036b7b4e22d6e9dc/numpy-2.4.2-cp313-cp313t-win_arm64.whl", hash = "sha256:fd49860271d52127d61197bb50b64f58454e9f578cb4b2c001a6de8b1f50b0b1", size = 10290756, upload-time = "2026-01-31T23:12:02.438Z" },
+    { url = "https://files.pythonhosted.org/packages/18/88/b7df6050bf18fdcfb7046286c6535cabbdd2064a3440fca3f069d319c16e/numpy-2.4.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:444be170853f1f9d528428eceb55f12918e4fda5d8805480f36a002f1415e09b", size = 16663092, upload-time = "2026-01-31T23:12:04.521Z" },
+    { url = "https://files.pythonhosted.org/packages/25/7a/1fee4329abc705a469a4afe6e69b1ef7e915117747886327104a8493a955/numpy-2.4.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d1240d50adff70c2a88217698ca844723068533f3f5c5fa6ee2e3220e3bdb000", size = 14698770, upload-time = "2026-01-31T23:12:06.96Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/0b/f9e49ba6c923678ad5bc38181c08ac5e53b7a5754dbca8e581aa1a56b1ff/numpy-2.4.2-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:7cdde6de52fb6664b00b056341265441192d1291c130e99183ec0d4b110ff8b1", size = 5208562, upload-time = "2026-01-31T23:12:09.632Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/12/d7de8f6f53f9bb76997e5e4c069eda2051e3fe134e9181671c4391677bb2/numpy-2.4.2-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:cda077c2e5b780200b6b3e09d0b42205a3d1c68f30c6dceb90401c13bff8fe74", size = 6543710, upload-time = "2026-01-31T23:12:11.969Z" },
+    { url = "https://files.pythonhosted.org/packages/09/63/c66418c2e0268a31a4cf8a8b512685748200f8e8e8ec6c507ce14e773529/numpy-2.4.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d30291931c915b2ab5717c2974bb95ee891a1cf22ebc16a8006bd59cd210d40a", size = 15677205, upload-time = "2026-01-31T23:12:14.33Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/6c/7f237821c9642fb2a04d2f1e88b4295677144ca93285fd76eff3bcba858d/numpy-2.4.2-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bba37bc29d4d85761deed3954a1bc62be7cf462b9510b51d367b769a8c8df325", size = 16611738, upload-time = "2026-01-31T23:12:16.525Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/a7/39c4cdda9f019b609b5c473899d87abff092fc908cfe4d1ecb2fcff453b0/numpy-2.4.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b2f0073ed0868db1dcd86e052d37279eef185b9c8db5bf61f30f46adac63c909", size = 17028888, upload-time = "2026-01-31T23:12:19.306Z" },
+    { url = "https://files.pythonhosted.org/packages/da/b3/e84bb64bdfea967cc10950d71090ec2d84b49bc691df0025dddb7c26e8e3/numpy-2.4.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7f54844851cdb630ceb623dcec4db3240d1ac13d4990532446761baede94996a", size = 18339556, upload-time = "2026-01-31T23:12:21.816Z" },
+    { url = "https://files.pythonhosted.org/packages/88/f5/954a291bc1192a27081706862ac62bb5920fbecfbaa302f64682aa90beed/numpy-2.4.2-cp314-cp314-win32.whl", hash = "sha256:12e26134a0331d8dbd9351620f037ec470b7c75929cb8a1537f6bfe411152a1a", size = 6006899, upload-time = "2026-01-31T23:12:24.14Z" },
+    { url = "https://files.pythonhosted.org/packages/05/cb/eff72a91b2efdd1bc98b3b8759f6a1654aa87612fc86e3d87d6fe4f948c4/numpy-2.4.2-cp314-cp314-win_amd64.whl", hash = "sha256:068cdb2d0d644cdb45670810894f6a0600797a69c05f1ac478e8d31670b8ee75", size = 12443072, upload-time = "2026-01-31T23:12:26.33Z" },
+    { url = "https://files.pythonhosted.org/packages/37/75/62726948db36a56428fce4ba80a115716dc4fad6a3a4352487f8bb950966/numpy-2.4.2-cp314-cp314-win_arm64.whl", hash = "sha256:6ed0be1ee58eef41231a5c943d7d1375f093142702d5723ca2eb07db9b934b05", size = 10494886, upload-time = "2026-01-31T23:12:28.488Z" },
+    { url = "https://files.pythonhosted.org/packages/36/2f/ee93744f1e0661dc267e4b21940870cabfae187c092e1433b77b09b50ac4/numpy-2.4.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:98f16a80e917003a12c0580f97b5f875853ebc33e2eaa4bccfc8201ac6869308", size = 14818567, upload-time = "2026-01-31T23:12:30.709Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/24/6535212add7d76ff938d8bdc654f53f88d35cddedf807a599e180dcb8e66/numpy-2.4.2-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:20abd069b9cda45874498b245c8015b18ace6de8546bf50dfa8cea1696ed06ef", size = 5328372, upload-time = "2026-01-31T23:12:32.962Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/9d/c48f0a035725f925634bf6b8994253b43f2047f6778a54147d7e213bc5a7/numpy-2.4.2-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:e98c97502435b53741540a5717a6749ac2ada901056c7db951d33e11c885cc7d", size = 6649306, upload-time = "2026-01-31T23:12:34.797Z" },
+    { url = "https://files.pythonhosted.org/packages/81/05/7c73a9574cd4a53a25907bad38b59ac83919c0ddc8234ec157f344d57d9a/numpy-2.4.2-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da6cad4e82cb893db4b69105c604d805e0c3ce11501a55b5e9f9083b47d2ffe8", size = 15722394, upload-time = "2026-01-31T23:12:36.565Z" },
+    { url = "https://files.pythonhosted.org/packages/35/fa/4de10089f21fc7d18442c4a767ab156b25c2a6eaf187c0db6d9ecdaeb43f/numpy-2.4.2-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e4424677ce4b47fe73c8b5556d876571f7c6945d264201180db2dc34f676ab5", size = 16653343, upload-time = "2026-01-31T23:12:39.188Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/f9/d33e4ffc857f3763a57aa85650f2e82486832d7492280ac21ba9efda80da/numpy-2.4.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2b8f157c8a6f20eb657e240f8985cc135598b2b46985c5bccbde7616dc9c6b1e", size = 17078045, upload-time = "2026-01-31T23:12:42.041Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/b8/54bdb43b6225badbea6389fa038c4ef868c44f5890f95dd530a218706da3/numpy-2.4.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5daf6f3914a733336dab21a05cdec343144600e964d2fcdabaac0c0269874b2a", size = 18380024, upload-time = "2026-01-31T23:12:44.331Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/55/6e1a61ded7af8df04016d81b5b02daa59f2ea9252ee0397cb9f631efe9e5/numpy-2.4.2-cp314-cp314t-win32.whl", hash = "sha256:8c50dd1fc8826f5b26a5ee4d77ca55d88a895f4e4819c7ecc2a9f5905047a443", size = 6153937, upload-time = "2026-01-31T23:12:47.229Z" },
+    { url = "https://files.pythonhosted.org/packages/45/aa/fa6118d1ed6d776b0983f3ceac9b1a5558e80df9365b1c3aa6d42bf9eee4/numpy-2.4.2-cp314-cp314t-win_amd64.whl", hash = "sha256:fcf92bee92742edd401ba41135185866f7026c502617f422eb432cfeca4fe236", size = 12631844, upload-time = "2026-01-31T23:12:48.997Z" },
+    { url = "https://files.pythonhosted.org/packages/32/0a/2ec5deea6dcd158f254a7b372fb09cfba5719419c8d66343bab35237b3fb/numpy-2.4.2-cp314-cp314t-win_arm64.whl", hash = "sha256:1f92f53998a17265194018d1cc321b2e96e900ca52d54c7c77837b71b9465181", size = 10565379, upload-time = "2026-01-31T23:12:51.345Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f8/50e14d36d915ef64d8f8bc4a087fc8264d82c785eda6711f80ab7e620335/numpy-2.4.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:89f7268c009bc492f506abd6f5265defa7cb3f7487dc21d357c3d290add45082", size = 16833179, upload-time = "2026-01-31T23:12:53.5Z" },
+    { url = "https://files.pythonhosted.org/packages/17/17/809b5cad63812058a8189e91a1e2d55a5a18fd04611dbad244e8aeae465c/numpy-2.4.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:e6dee3bb76aa4009d5a912180bf5b2de012532998d094acee25d9cb8dee3e44a", size = 14889755, upload-time = "2026-01-31T23:12:55.933Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/ea/181b9bcf7627fc8371720316c24db888dcb9829b1c0270abf3d288b2e29b/numpy-2.4.2-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:cd2bd2bbed13e213d6b55dc1d035a4f91748a7d3edc9480c13898b0353708920", size = 5399500, upload-time = "2026-01-31T23:12:58.671Z" },
+    { url = "https://files.pythonhosted.org/packages/33/9f/413adf3fc955541ff5536b78fcf0754680b3c6d95103230252a2c9408d23/numpy-2.4.2-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:cf28c0c1d4c4bf00f509fa7eb02c58d7caf221b50b467bcb0d9bbf1584d5c821", size = 6714252, upload-time = "2026-01-31T23:13:00.518Z" },
+    { url = "https://files.pythonhosted.org/packages/91/da/643aad274e29ccbdf42ecd94dafe524b81c87bcb56b83872d54827f10543/numpy-2.4.2-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e04ae107ac591763a47398bb45b568fc38f02dbc4aa44c063f67a131f99346cb", size = 15797142, upload-time = "2026-01-31T23:13:02.219Z" },
+    { url = "https://files.pythonhosted.org/packages/66/27/965b8525e9cb5dc16481b30a1b3c21e50c7ebf6e9dbd48d0c4d0d5089c7e/numpy-2.4.2-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:602f65afdef699cda27ec0b9224ae5dc43e328f4c24c689deaf77133dbee74d0", size = 16727979, upload-time = "2026-01-31T23:13:04.62Z" },
+    { url = "https://files.pythonhosted.org/packages/de/e5/b7d20451657664b07986c2f6e3be564433f5dcaf3482d68eaecd79afaf03/numpy-2.4.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:be71bf1edb48ebbbf7f6337b5bfd2f895d1902f6335a5830b20141fc126ffba0", size = 12502577, upload-time = "2026-01-31T23:13:07.08Z" },
+]
+
+[[package]]
+name = "nv-grouped-gemm"
+version = "1.1.4.post8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "absl-py" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "torch", marker = "sys_platform == 'never'" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/02/ad/046a097b63a96c1ba1d85f0031dbe7fcbdb33e6c445dfbaba2ffaefdd497/nv_grouped_gemm-1.1.4.post8.tar.gz", hash = "sha256:ab321693f0292cfd8a26dc7b6f14decd9eb00e209494de7218e4fad36191275d", size = 20821209, upload-time = "2025-12-17T02:22:38.432Z" }
 
 [[package]]
 name = "nv-one-logger-core"
@@ -3591,183 +3576,58 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8a/86/94188e03e5d4dd7b73c390b0cddcde5618b3799c18e327b2bf15763f6137/nvdlfw_inspect-0.2.2-py3-none-any.whl", hash = "sha256:8a4dc2814c5a4cd19ae304170b9bfa514538ef3c3eb243a45a82404ec3cb279d", size = 30964, upload-time = "2025-12-03T10:52:01.933Z" },
 ]
 
-[[package]]
-name = "nvidia-cublas-cu12"
-version = "12.8.4.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
-    { url = "https://files.pythonhosted.org/packages/70/61/7d7b3c70186fb651d0fbd35b01dbfc8e755f69fd58f817f3d0f642df20c3/nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af", size = 567544208, upload-time = "2025-03-07T01:53:30.535Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-cupti-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
-    { url = "https://files.pythonhosted.org/packages/41/bc/83f5426095d93694ae39fe1311431b5d5a9bb82e48bf0dd8e19be2765942/nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e", size = 7015759, upload-time = "2025-03-07T01:51:11.355Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-nvrtc-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" },
-    { url = "https://files.pythonhosted.org/packages/45/51/52a3d84baa2136cc8df15500ad731d74d3a1114d4c123e043cb608d4a32b/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909", size = 73586838, upload-time = "2025-03-07T01:52:13.483Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-runtime-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
-    { url = "https://files.pythonhosted.org/packages/30/a5/a515b7600ad361ea14bfa13fb4d6687abf500adc270f19e89849c0590492/nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8", size = 944318, upload-time = "2025-03-07T01:51:01.794Z" },
-]
-
-[[package]]
-name = "nvidia-cudnn-cu12"
-version = "9.10.2.21"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" },
-]
-
 [[package]]
 name = "nvidia-cudnn-frontend"
-version = "1.19.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e8/b3/cc16cafc95c85b0c2c2dd33628822315b409c1866eff3385558842277b45/nvidia_cudnn_frontend-1.19.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ddc753d312e74ba4c0e2528a796411cdb93af3950811226e6f2b6e4420b97b74", size = 2268847, upload-time = "2026-03-11T05:38:41.047Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/32/13c3c7fb2d476efe1a615732f9104da6a0b1026a8a24cc4a7bbc6815c560/nvidia_cudnn_frontend-1.19.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ef4db77a06b0c1ba2bdea41dd056d4ab2ec7a291876153c2148b978729a40b4", size = 2413688, upload-time = "2026-03-11T05:43:25.223Z" },
-    { url = "https://files.pythonhosted.org/packages/70/51/b46174e161e6a3296b94eb44737db4029ab1486eb488031e02203e6dadd8/nvidia_cudnn_frontend-1.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:035dc1f8766181b83723e61f1c20de9ba39ffedba6f37fbf996305eeec3cdadb", size = 1857694, upload-time = "2026-03-11T05:26:14.784Z" },
-    { url = "https://files.pythonhosted.org/packages/84/f9/e81193aeb8d68ca004a18e4b9f36f8779402298a2034030f7ad057c495bb/nvidia_cudnn_frontend-1.19.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25b79095b4d8e8ba131eb864e367d5c497a702db96ef50ca5e0ce7292335ca68", size = 2269006, upload-time = "2026-03-11T05:39:31.074Z" },
-    { url = "https://files.pythonhosted.org/packages/55/c5/b9d7d54f6c62f556995c1302066a1afab6a1ee012e70525ac9ca165d4706/nvidia_cudnn_frontend-1.19.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:241fba15057135fed04223c5e7171a44bb407cb67d706255f072192553984792", size = 2415721, upload-time = "2026-03-11T05:43:48.875Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/a8/91e8b75f94978423f357e80d440b812f118dca1969b0e2d12bf035a29f7e/nvidia_cudnn_frontend-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:35bea3a533281c64361d094d3c07d0a536ebd25cafccf76ab9b1ba8df6d93c39", size = 1858706, upload-time = "2026-03-11T05:26:37.765Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/75/ccede2c3e9d422ca32900fd41fcbe461a329b1202a13ccd135ea46047539/nvidia_cudnn_frontend-1.19.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f288b3475b961fc69daa6ca1d738ccf3ab589ad0f7a59762462c7b00237f3633", size = 2271847, upload-time = "2026-03-11T05:40:32.402Z" },
-    { url = "https://files.pythonhosted.org/packages/45/02/b2bfbc85bcfba46471c321cd50a3451179911d161acc8380578436a5fb09/nvidia_cudnn_frontend-1.19.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c4f02adabf32c0ff305eaaa87c7cf27baea3ac396b01affffcfdcecc95f55fa6", size = 2418004, upload-time = "2026-03-11T05:44:12.055Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/36/0a020d9b959a32d9aa9b784d68c857a5e8dc09581fac05426178c0812d90/nvidia_cudnn_frontend-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:353814c052345c6fa3b06a904102c9a23c04a9ab66c3cb83077f5fec80930b3f", size = 1859291, upload-time = "2026-03-11T05:27:02.324Z" },
-    { url = "https://files.pythonhosted.org/packages/16/96/732ea16f16bd08566ac9988b967e64334e067ceb50824a9e498ab65bed88/nvidia_cudnn_frontend-1.19.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5726bbda42a839aae0c9604272dcabbf298cfd8f3a9984c4b36161543abfb63b", size = 2272166, upload-time = "2026-03-11T05:41:24.968Z" },
-    { url = "https://files.pythonhosted.org/packages/55/f5/79b5bb595ac86c2dae658bbb2f19c92ae495592a3d98b8f401a1cbb050cd/nvidia_cudnn_frontend-1.19.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:451e57f95a44dceb9a7cfc6842cc0e645f3f3a44f8f4ccd1e7501f06ce4361d3", size = 2418047, upload-time = "2026-03-11T05:44:35.65Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/c1/891ed3920d83bbe05cf74460f38d6f43e027a2b7ad63b4307e3d9c345d32/nvidia_cudnn_frontend-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:409ec11e845f7fa14dbeb242f27080e4848cab7f66c3ad8d857051af0216bcb7", size = 1859093, upload-time = "2026-03-11T05:27:25.796Z" },
-    { url = "https://files.pythonhosted.org/packages/58/d3/fb0cdc6d778c0650182b1090b465ecd14c96b7ae607d1c15bb130abd611b/nvidia_cudnn_frontend-1.19.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f14d616587b2ef0949bd33ce7d37481ec424558dd56941b11a6098a7a840e47", size = 2274486, upload-time = "2026-03-11T05:42:16.69Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/d7/f18fc19d0c6d32b0617de29e180a198455f1caf491cbeadb5cc5ede6fd70/nvidia_cudnn_frontend-1.19.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05b19c76e3ce2edfa9d17f8b7ee1c6d3b00cb389b95739121ac6736df15f6747", size = 2419992, upload-time = "2026-03-11T05:44:58.889Z" },
-    { url = "https://files.pythonhosted.org/packages/17/54/bc6926cc9038dc1f64d3a398ecaa78cb925985e6058bf28895c450d82ccc/nvidia_cudnn_frontend-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:65fefc992d7398ac6e98347c7f3d74a1b8122012893d41c75ea034cb43b51788", size = 1860874, upload-time = "2026-03-11T05:27:48.581Z" },
-]
-
-[[package]]
-name = "nvidia-cufft-cu12"
-version = "11.3.3.83"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/ec/ce1629f1e478bb5ccd208986b5f9e0316a78538dd6ab1d0484f012f8e2a1/nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7", size = 192216559, upload-time = "2025-03-07T01:53:57.106Z" },
-]
-
-[[package]]
-name = "nvidia-cufile-cu12"
-version = "1.13.1.3"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" },
-]
-
-[[package]]
-name = "nvidia-curand-cu12"
-version = "10.3.9.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/75/70c05b2f3ed5be3bb30b7102b6eb78e100da4bbf6944fd6725c012831cab/nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec", size = 62765309, upload-time = "2025-03-07T01:54:20.478Z" },
-]
-
-[[package]]
-name = "nvidia-cusolver-cu12"
-version = "11.7.3.90"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12" },
-    { name = "nvidia-cusparse-cu12" },
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
-    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
-    { url = "https://files.pythonhosted.org/packages/13/c0/76ca8551b8a84146ffa189fec81c26d04adba4bc0dbe09cd6e6fd9b7de04/nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34", size = 256720438, upload-time = "2025-03-07T01:54:39.898Z" },
-]
-
-[[package]]
-name = "nvidia-cusparse-cu12"
-version = "12.5.8.93"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
-    { url = "https://files.pythonhosted.org/packages/62/07/f3b2ad63f8e3d257a599f422ae34eb565e70c41031aecefa3d18b62cabd1/nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd", size = 284937404, upload-time = "2025-03-07T01:55:07.742Z" },
-]
-
-[[package]]
-name = "nvidia-cusparselt-cu12"
-version = "0.7.1"
+version = "1.18.0"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
-    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" },
+    { url = "https://files.pythonhosted.org/packages/86/be/f5a1e633c524c13c0182213ab27dab42dca29a3c785be5ff74d2d185aed1/nvidia_cudnn_frontend-1.18.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:baa6fbc8e7c55f1c78c0374ed9a890e1cf81acaca0c92d6135d18a8e3c985244", size = 2023500, upload-time = "2026-01-27T23:31:34.747Z" },
+    { url = "https://files.pythonhosted.org/packages/82/a7/765a17c6a9496196c34f269d17dfb902b6c618c0261c0962511e95302e81/nvidia_cudnn_frontend-1.18.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e4bcca42259e358002c8867e3624a558f66cd5dff2cc6c3aafd860ef2f41730", size = 2154278, upload-time = "2026-01-27T23:06:55.784Z" },
+    { url = "https://files.pythonhosted.org/packages/19/a1/7caae2243540bc60e47eae95f0fd913c9baa05cf94df0471914f70d45158/nvidia_cudnn_frontend-1.18.0-cp310-cp310-win_amd64.whl", hash = "sha256:06252021ef1e5a7256f1e70429a426b01792636c05cc547fe8e64c6885a9652e", size = 1590158, upload-time = "2026-01-27T23:08:26.703Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/9a/83d3d080118de4a7810fa019349edec634b8b37b9cafaacd05719de62dd6/nvidia_cudnn_frontend-1.18.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f6d4d0b88d617b233a503c84980b54d840b60b2734497d1a7a071ec5293daec2", size = 2023709, upload-time = "2026-01-27T23:32:10.912Z" },
+    { url = "https://files.pythonhosted.org/packages/13/c7/c3624b3ed77b102618f26295e816b27f1c3ebb1143730237a9f51d403c3f/nvidia_cudnn_frontend-1.18.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:382ea063b92cbfd5b442cb75ff8422932d78276aecf139e46713ed1ad3d07af4", size = 2155568, upload-time = "2026-01-27T23:07:13.277Z" },
+    { url = "https://files.pythonhosted.org/packages/52/dd/8613dfd029d076b86a8a87efe3f4bb4ab73cec15fa8fc27e665098f4d167/nvidia_cudnn_frontend-1.18.0-cp311-cp311-win_amd64.whl", hash = "sha256:baa509effc4d299d3f04e549d4188f88bca8a8b527f483cbd2f66bc18f13a8b1", size = 1591244, upload-time = "2026-01-27T23:08:44.691Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/b4/604e230378680ee117849a4e1045baca092f93161a829291a84d5acce70c/nvidia_cudnn_frontend-1.18.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:310b417f2848a83d1437203fcaeea320a74fb7f28af20bf42bf5afc9c01f1c12", size = 2027408, upload-time = "2026-01-27T23:32:46.576Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/52/08f98262e77b1cbcc834cc1a5db494d0661ea1dbdea58c2e2d51a57fdaca/nvidia_cudnn_frontend-1.18.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c023539ca6de99234cf5102c3ec0d6af817f5396fc93028a22ba5b834a35b8a", size = 2159245, upload-time = "2026-01-27T23:07:32.664Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/1f/751a5a8cfdc95fb4dc556192d37369ae488c30c473fe9a3ec720b23d07ea/nvidia_cudnn_frontend-1.18.0-cp312-cp312-win_amd64.whl", hash = "sha256:e13f7dd46cdb4762dde87f181f06d1c5e15e9478bbdd547bfa74d9b11f415aae", size = 1591041, upload-time = "2026-01-27T23:09:04.118Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/bd/db791a26ebb6a6e1268f518e18c82d8ad18546f7008f4b0d5bde15f927de/nvidia_cudnn_frontend-1.18.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a6e2b7bd43705ffa4af3b187374fdd5e7d09fc228a4d65fc8b4b0a537a8e605", size = 2027249, upload-time = "2026-01-27T23:33:22.46Z" },
+    { url = "https://files.pythonhosted.org/packages/19/74/3038cf496d5de7cfdff730f5202e438c17d9123de507059340e02ddff9d7/nvidia_cudnn_frontend-1.18.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c0544206b02cae9da4f044ca3fe7416b99e0c8a8052285dd3e5a8fc445d34f9c", size = 2160001, upload-time = "2026-01-27T23:07:50.248Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/5e/148cc6609dba326e620e4d949246020dfba05ca07d0387442e62b71d19b6/nvidia_cudnn_frontend-1.18.0-cp313-cp313-win_amd64.whl", hash = "sha256:7eefa5f10cc003df5f3593f82f1ee6c001fc3412bdc78430c751914dfceefd7f", size = 1591270, upload-time = "2026-01-27T23:09:21.435Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/0a/515209dd2afc6027bf1112bf415f575bfe9628d18877abe7424cb597dd7b/nvidia_cudnn_frontend-1.18.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b489da1b30f1d7da822b37b89cc4f68afd80e020eb57e4ab24921f8b57f6e946", size = 2028689, upload-time = "2026-02-11T21:32:04.235Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/57/52d18e1f50979eeabfafb408ec73068afc5a1e1ccd21636240317cd456d4/nvidia_cudnn_frontend-1.18.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:37688c81a34ac590aff9de4c34d2968bab949411af707baa327616ebd4b34ae1", size = 2160182, upload-time = "2026-02-11T21:25:18.437Z" },
+    { url = "https://files.pythonhosted.org/packages/67/53/df2810b56d259ef96fa6beaa1381bd14c29fbe82836b409516e864c5e177/nvidia_cudnn_frontend-1.18.0-cp314-cp314-win_amd64.whl", hash = "sha256:5053b473fa74168b5fbf35934cd6187f88aa03b8447b9f2cd417332d5e5c9569", size = 1592759, upload-time = "2026-02-11T21:32:33.87Z" },
 ]
 
 [[package]]
 name = "nvidia-cutlass-dsl"
-version = "4.4.2"
+version = "4.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "nvidia-cutlass-dsl-libs-base" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a9/03/678dab0383db1ddfc449da216220f40404189eb36eeed9d87a4fa4bdb0e6/nvidia_cutlass_dsl-4.4.2-py3-none-any.whl", hash = "sha256:7cfb9ef19062b055b9372c7a627004724e2755e4c8b16c3cc88807d64501a4ae", size = 10167, upload-time = "2026-03-16T02:18:59.043Z" },
+    { url = "https://files.pythonhosted.org/packages/55/09/42fca58af350265131b6f8665ad5b62526c95e6692788460bd5306d3efe2/nvidia_cutlass_dsl-4.4.0-py3-none-any.whl", hash = "sha256:2d1f34333e4d774002d44b53262d71aaf738700fcf3858290629f9a7b374c61c", size = 10168, upload-time = "2026-02-14T03:38:54.267Z" },
 ]
 
 [[package]]
 name = "nvidia-cutlass-dsl-libs-base"
-version = "4.4.2"
+version = "4.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cuda-python" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5f/07/af1b456b5b6dd4a49e71a952a182a99fc863f70b9f78725324f89e0384e5/nvidia_cutlass_dsl_libs_base-4.4.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:06acb3acff3dcf4bf6630476efac7de94de30b988ded4fa00b647bbcec4224ff", size = 75471025, upload-time = "2026-03-16T02:23:49.61Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/12/f0770811d2874af7e04623d3baa83c445c49f38c00c4e5d20e1daae54b5d/nvidia_cutlass_dsl_libs_base-4.4.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:916bf612fba5fbc5162e300fe18196e960dac2328c1c1360c0939d3be05c7c71", size = 74355272, upload-time = "2026-03-16T02:24:44.22Z" },
-    { url = "https://files.pythonhosted.org/packages/60/bf/b9d0fd1ba281b111c941d9616dd9f98a509d84bf35076e60fef27ec7abd6/nvidia_cutlass_dsl_libs_base-4.4.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:261832dafe7579dc83cd3816ab9ea845e3de3737d876c215f01fb4edff1f4473", size = 75476977, upload-time = "2026-03-16T02:26:40.932Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/23/86dda6d69a3fc29d0cde2a8b54c056ad69b73a6e5e230e18d906d2ec3b7c/nvidia_cutlass_dsl_libs_base-4.4.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:40c2352b2fcc80789a216cbeb9b2ee10c85c15de839cda8f5c1d18166b8249df", size = 74356100, upload-time = "2026-03-16T02:26:12.778Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/7d/0df5e38d11e52cc72095a14d6448bc1c5d0d4b00b069a1189ca417fb225b/nvidia_cutlass_dsl_libs_base-4.4.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:2ec8812eeadcbb6fe20bda2e295ed9c00653f8253b78e33cf0ab65a47b829e73", size = 75473821, upload-time = "2026-03-16T02:27:08.371Z" },
-    { url = "https://files.pythonhosted.org/packages/56/98/e264964741d9cc9816625d9600d17a5249fd5cbd8c2d166fb0d0c34dfe5a/nvidia_cutlass_dsl_libs_base-4.4.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:22e37b58f7a6f2f43bba533c4df8a088012122e0b4e9a632eca23937adeafb39", size = 74355593, upload-time = "2026-03-16T02:25:11.762Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/c9/2f17950ee2deb4b5f6b82f8155515a21792fe296e81bb638f164d8e2ca9b/nvidia_cutlass_dsl_libs_base-4.4.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:b59a052cbfb9a25747d1b6d413615456bea38d1f377da085af07c0d86a4c8b39", size = 75477304, upload-time = "2026-03-16T02:27:35.645Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/68/27380038ebd9c8eab4be364e833fea144aef597704f44948921668f7adf4/nvidia_cutlass_dsl_libs_base-4.4.2-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8e3324a33afa7424e93beae7e54a311e80db82b9e4ed4bba2aeeda1d6c888cd9", size = 74355765, upload-time = "2026-03-16T02:24:16.778Z" },
-    { url = "https://files.pythonhosted.org/packages/12/44/0dc7f2e5b5c65106a5bb05e60654f1a79abe92e27e9b00588a73cd26ca1f/nvidia_cutlass_dsl_libs_base-4.4.2-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:af96c1170569138b3cb965202907fbf5ab95d7c1dcc210952d00cdf9ab7b859a", size = 75472171, upload-time = "2026-03-16T02:28:03.136Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/ae/0998f328b28b956d7eb399d16f4ee681ca318b306007264444a623e86c64/nvidia_cutlass_dsl_libs_base-4.4.2-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:95db0c8d1d56992e2f5c2dcd5b3baab0297bedc0cbcefc1e70b57acd934e7b23", size = 74356280, upload-time = "2026-03-16T02:25:43.789Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/af/cf64251bae66077769adbcd9a2e96b86aeb3c41490c5ee0a939a1a3b511e/nvidia_cutlass_dsl_libs_base-4.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:703169d0843ad7e310b397aa95128e3fa983571a9a488f826c2968f3e71df2b8", size = 75460001, upload-time = "2026-02-14T03:44:18.705Z" },
+    { url = "https://files.pythonhosted.org/packages/87/94/42af69f7de79658d45116a32f5b6c9d5cfc37a37d989f057445c20db9b1e/nvidia_cutlass_dsl_libs_base-4.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:264fc34a096bd144ebb8ff0f1fcd5eeeaa9d30528cfd801141a9f7856a58b95a", size = 74345534, upload-time = "2026-02-14T03:47:04.545Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/08/1b1481b382f0bfddb91fe19c425dae7ffcb0dacb19a60d4fa490f19cabdf/nvidia_cutlass_dsl_libs_base-4.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:18249a0c13a7b7fe08fbf600ce38a871538067cfe7b20ef2bc131a5902a67377", size = 75457259, upload-time = "2026-02-14T03:44:48.408Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/2f/4d525af7805a7cf04f25efd9900d9acca1d6a8973f436b6058dfec5b545f/nvidia_cutlass_dsl_libs_base-4.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c09ee076f2b61ba26523686f550a2c642a35ec178861a5e0a38f2979ad515604", size = 74345003, upload-time = "2026-02-14T03:46:37.751Z" },
+    { url = "https://files.pythonhosted.org/packages/33/34/63a1dce4d65cd6fd29b9d50286abbfcdd965c3ca2156cf423eda2ab1fc5d/nvidia_cutlass_dsl_libs_base-4.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9cde72efb065d9bea29a92ca85835eaedec20bf89af22798d2d2a551ccd51731", size = 75458501, upload-time = "2026-02-14T03:45:15.866Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/ae/5bbd3c9d7909d64a7f139b480c70ff3220554f64775e941c95438265ef1f/nvidia_cutlass_dsl_libs_base-4.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e31a2fcc9854417242ee072c9b8fd1257d5ee422166dfd85eb3f8784fee34dd8", size = 74345995, upload-time = "2026-02-14T03:45:42.9Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5c/c76ec134e0fbd4ee2f31b32e1fbcb727e7f6323d136a3fc7a8ea3aa3e75d/nvidia_cutlass_dsl_libs_base-4.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ad63fe382b36f69f2a9b51d35e95cbcb240565d06a990e5a19a8eacae49c8b94", size = 75456473, upload-time = "2026-02-14T03:43:51.005Z" },
+    { url = "https://files.pythonhosted.org/packages/32/22/65c0abbc8518d3f80b5d8adbd8cec640f16f8c0620b01cfbecbfd14d6899/nvidia_cutlass_dsl_libs_base-4.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:b0eb94678159f750db6bf214d79e0b815e9b5a53fad3925fda53e1591cbdeb0d", size = 74345762, upload-time = "2026-02-14T03:46:09.745Z" },
 ]
 
 [[package]]
@@ -3789,12 +3649,12 @@ wheels = [
 
 [[package]]
 name = "nvidia-modelopt"
-version = "0.42.0"
+version = "0.41.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ninja" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "nvidia-ml-py" },
     { name = "packaging" },
     { name = "pulp" },
@@ -3808,51 +3668,13 @@ dependencies = [
     { name = "tqdm" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/61/a4/ad7f8d4ce21e1df1670aaaa05db45bece34a74c9fb44e4e77f668a24adce/nvidia_modelopt-0.42.0-py3-none-any.whl", hash = "sha256:3e8149b4d206b4ae51165f4f6a6d28fc9c2172406c948d5abcd8637b08db5c28", size = 1005332, upload-time = "2026-03-09T20:43:57.936Z" },
-]
-
-[[package]]
-name = "nvidia-nccl-cu12"
-version = "2.27.5"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
-]
-
-[[package]]
-name = "nvidia-nvjitlink-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/d7/34f02dad2e30c31b10a51f6b04e025e5dd60e5f936af9045a9b858a05383/nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f", size = 268553710, upload-time = "2025-03-07T01:56:24.13Z" },
-]
-
-[[package]]
-name = "nvidia-nvshmem-cu12"
-version = "3.4.5"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/6a/03aa43cc9bd3ad91553a88b5f6fb25ed6a3752ae86ce2180221962bc2aa5/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b48363fc6964dede448029434c6abed6c5e37f823cb43c3bcde7ecfc0457e15", size = 138936938, upload-time = "2025-09-06T00:32:05.589Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload-time = "2025-09-06T00:32:31.266Z" },
-]
-
-[[package]]
-name = "nvidia-nvtx-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/99/4c9c0c329bf9fc125008c3b54c7c94c0023518d06fc025ae36431375e1fe/nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e", size = 56492, upload-time = "2025-03-07T01:52:24.69Z" },
+    { url = "https://files.pythonhosted.org/packages/16/09/30147ab0d0409d3492f1d37469fe0586c82aeec6eec9a907f59d24094516/nvidia_modelopt-0.41.0-py3-none-any.whl", hash = "sha256:ffa5f903d22653649318831a470550ae55ee04716c068d5ade61c3176fdc1d7d", size = 934582, upload-time = "2026-01-20T17:21:28.494Z" },
 ]
 
 [[package]]
 name = "nvidia-resiliency-ext"
 version = "0.5.0"
-source = { git = "https://github.com/NVIDIA/nvidia-resiliency-ext.git?rev=v0.5.0#5eb5f7ec84e9aa1bf45c403b06d6ef766ea6784a" }
+source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "defusedxml" },
     { name = "nv-one-logger-core" },
@@ -3863,6 +3685,14 @@ dependencies = [
     { name = "pyyaml" },
     { name = "torch", marker = "sys_platform == 'never'" },
 ]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/df/18/1898cad3bdd643c6bfa5f7aee125a5ef308ab1701ab15106e3e9c66bb416/nvidia_resiliency_ext-0.5.0-cp310-cp310-manylinux_2_39_aarch64.whl", hash = "sha256:97d4b68d3949f3b8370addb474d8662d6ac5008c3c1296420cdeb93a88d6a804", size = 402915, upload-time = "2025-11-13T21:28:34.578Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/48/10fc3f278898e3b2aacc3bea65f0ac4b579e6e0e8447b467742d75adeec1/nvidia_resiliency_ext-0.5.0-cp310-cp310-manylinux_2_39_x86_64.whl", hash = "sha256:ceb04ec5a7bc9301fd6f14449bda6b0d1f37ead4fbe37aa3bf1d7b2ad5b662d4", size = 406483, upload-time = "2025-11-13T21:28:58.732Z" },
+    { url = "https://files.pythonhosted.org/packages/14/17/c19dfed8d4aced307a1c1404f0917ee6c1b319db8092b3cfe2af4e76de6d/nvidia_resiliency_ext-0.5.0-cp311-cp311-manylinux_2_39_aarch64.whl", hash = "sha256:62d396356adcf898cb86a54956eeece29017a41b5872db0b364c8449d23f2f66", size = 404062, upload-time = "2025-11-13T21:29:46.873Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/99/b4324595171c3cdffb03cef070006ab9a3de7fca90a22403576ec6423b69/nvidia_resiliency_ext-0.5.0-cp311-cp311-manylinux_2_39_x86_64.whl", hash = "sha256:c4fcd006ef69300f753bb30d17efbb6bcee6699f044e3532209b2825d22e9977", size = 407027, upload-time = "2025-11-13T21:30:09.124Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/73/232d9f25558f3c6165ff1d15c980a434b47c13e8f527f999cd265859abcf/nvidia_resiliency_ext-0.5.0-cp312-cp312-manylinux_2_39_aarch64.whl", hash = "sha256:81e3d827885e90bed369e67f76dda6709dd4073c2e5fa1228df85d6987cee495", size = 403317, upload-time = "2025-11-13T21:31:24.603Z" },
+    { url = "https://files.pythonhosted.org/packages/44/89/4d7f39416aa3be72ee9f1260a7af56af40f2570f5add1e039d96279a8764/nvidia_resiliency_ext-0.5.0-cp312-cp312-manylinux_2_39_x86_64.whl", hash = "sha256:eb720cd25feabef07f971d4051c7bcac2f9ec73642a9031953d2663307950cb9", size = 407963, upload-time = "2025-11-13T21:30:28.998Z" },
+]
 
 [[package]]
 name = "nvidia-sphinx-theme"
@@ -3927,7 +3757,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ml-dtypes" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "protobuf" },
     { name = "typing-extensions" },
 ]
@@ -3964,7 +3794,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ml-dtypes" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "onnx" },
     { name = "sympy" },
     { name = "typing-extensions" },
@@ -3981,7 +3811,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ml-dtypes" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "onnx" },
     { name = "onnx-ir" },
     { name = "packaging" },
@@ -3994,7 +3824,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "2.28.0"
+version = "2.23.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -4006,9 +3836,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/56/87/eb0abb4ef88ddb95b3c13149384c4c288f584f3be17d6a4f63f8c3e3c226/openai-2.28.0.tar.gz", hash = "sha256:bb7fdff384d2a787fa82e8822d1dd3c02e8cf901d60f1df523b7da03cbb6d48d", size = 670334, upload-time = "2026-03-13T19:56:27.306Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/61/4b/dc1d84b8237205ebe48a1b1c9c3a8e1ab9fd08b30811b6d787196df58fd6/openai-2.23.0.tar.gz", hash = "sha256:7d24cc8087d5e8eed58e98aaa823391d39d12f9a9a2755770f67c7bb2004d94c", size = 657323, upload-time = "2026-02-24T03:20:20.323Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c0/5a/df122348638885526e53140e9c6b0d844af7312682b3bde9587eebc28b47/openai-2.28.0-py3-none-any.whl", hash = "sha256:79aa5c45dba7fef84085701c235cf13ba88485e1ef4f8dfcedc44fc2a698fc1d", size = 1141218, upload-time = "2026-03-13T19:56:25.46Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/5f/bcdf0fb510c24f021e485f920677da363cd59d6e0310171bf2cad6e052b5/openai-2.23.0-py3-none-any.whl", hash = "sha256:1041d40bebf845053fda1946104f8bf9c3e2df957a41c3878c55c72c352630e9", size = 1118971, upload-time = "2026-02-24T03:20:18.708Z" },
 ]
 
 [package.optional-dependencies]
@@ -4045,19 +3875,24 @@ name = "opentelemetry-api"
 version = "1.33.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version < '3.11'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "deprecated", marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
@@ -4070,30 +3905,35 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-api"
-version = "1.40.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version < '3.11'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "importlib-metadata", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/2c/1d/4049a9e8698361cc1a1aa03a6c59e4fa4c71e0c0f94a30f988a6876a2ae6/opentelemetry_api-1.40.0.tar.gz", hash = "sha256:159be641c0b04d11e9ecd576906462773eb97ae1b657730f0ecf64d32071569f", size = 70851, upload-time = "2026-03-04T14:17:21.555Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/97/b9/3161be15bb8e3ad01be8be5a968a9237c3027c5be504362ff800fca3e442/opentelemetry_api-1.39.1.tar.gz", hash = "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c", size = 65767, upload-time = "2025-12-11T13:32:39.182Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5f/bf/93795954016c522008da367da292adceed71cca6ee1717e1d64c83089099/opentelemetry_api-1.40.0-py3-none-any.whl", hash = "sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9", size = 68676, upload-time = "2026-03-04T14:17:01.24Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/df/d3f1ddf4bb4cb50ed9b1139cc7b1c54c34a1e7ce8fd1b9a37c0d1551a6bd/opentelemetry_api-1.39.1-py3-none-any.whl", hash = "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", size = 66356, upload-time = "2025-12-11T13:32:17.304Z" },
 ]
 
 [[package]]
@@ -4101,19 +3941,24 @@ name = "opentelemetry-exporter-prometheus"
 version = "0.54b1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version < '3.11'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "opentelemetry-api", version = "1.33.1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
@@ -4127,43 +3972,48 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-exporter-prometheus"
-version = "0.61b0"
+version = "0.60b1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version < '3.11'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "opentelemetry-api", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
-    { name = "opentelemetry-sdk", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-api", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-sdk", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "prometheus-client", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/4a/20/9e818fd364d12e8d0cfdce4a3b2d82e24d98c4ceebb315de6b6770b5f214/opentelemetry_exporter_prometheus-0.61b0.tar.gz", hash = "sha256:7c4919bd8e79abd62b610767e80f42c9c3a06c5183f4dd9141eedeb57aea284b", size = 15136, upload-time = "2026-03-04T14:17:26.275Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/14/39/7dafa6fff210737267bed35a8855b6ac7399b9e582b8cf1f25f842517012/opentelemetry_exporter_prometheus-0.60b1.tar.gz", hash = "sha256:a4011b46906323f71724649d301b4dc188aaa068852e814f4df38cc76eac616b", size = 14976, upload-time = "2025-12-11T13:32:42.944Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/02/4a/b65d40e94d1d930aee73a1a2857211ee6ab10ce3686cbdae5eea78cd9d34/opentelemetry_exporter_prometheus-0.61b0-py3-none-any.whl", hash = "sha256:3013b41f4370143d48d219a2351473761423e5882fa4c213811eaefacba39cb7", size = 13149, upload-time = "2026-03-04T14:17:08.983Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/0d/4be6bf5477a3eb3d917d2f17d3c0b6720cd6cb97898444a61d43cc983f5c/opentelemetry_exporter_prometheus-0.60b1-py3-none-any.whl", hash = "sha256:49f59178de4f4590e3cef0b8b95cf6e071aae70e1f060566df5546fad773b8fd", size = 13019, upload-time = "2025-12-11T13:32:23.974Z" },
 ]
 
 [[package]]
 name = "opentelemetry-proto"
-version = "1.40.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "protobuf" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/4c/77/dd38991db037fdfce45849491cb61de5ab000f49824a00230afb112a4392/opentelemetry_proto-1.40.0.tar.gz", hash = "sha256:03f639ca129ba513f5819810f5b1f42bcb371391405d99c168fe6937c62febcd", size = 45667, upload-time = "2026-03-04T14:17:31.194Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/49/1d/f25d76d8260c156c40c97c9ed4511ec0f9ce353f8108ca6e7561f82a06b2/opentelemetry_proto-1.39.1.tar.gz", hash = "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8", size = 46152, upload-time = "2025-12-11T13:32:48.681Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b9/b2/189b2577dde745b15625b3214302605b1353436219d42b7912e77fa8dc24/opentelemetry_proto-1.40.0-py3-none-any.whl", hash = "sha256:266c4385d88923a23d63e353e9761af0f47a6ed0d486979777fe4de59dc9b25f", size = 72073, upload-time = "2026-03-04T14:17:16.673Z" },
+    { url = "https://files.pythonhosted.org/packages/51/95/b40c96a7b5203005a0b03d8ce8cd212ff23f1793d5ba289c87a097571b18/opentelemetry_proto-1.39.1-py3-none-any.whl", hash = "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", size = 72535, upload-time = "2025-12-11T13:32:33.866Z" },
 ]
 
 [[package]]
@@ -4171,19 +4021,24 @@ name = "opentelemetry-sdk"
 version = "1.33.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version < '3.11'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "opentelemetry-api", version = "1.33.1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
@@ -4197,31 +4052,36 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-sdk"
-version = "1.40.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version < '3.11'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "opentelemetry-api", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
-    { name = "opentelemetry-semantic-conventions", version = "0.61b0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-api", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-semantic-conventions", version = "0.60b1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/58/fd/3c3125b20ba18ce2155ba9ea74acb0ae5d25f8cd39cfd37455601b7955cc/opentelemetry_sdk-1.40.0.tar.gz", hash = "sha256:18e9f5ec20d859d268c7cb3c5198c8d105d073714db3de50b593b8c1345a48f2", size = 184252, upload-time = "2026-03-04T14:17:31.87Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/eb/fb/c76080c9ba07e1e8235d24cdcc4d125ef7aa3edf23eb4e497c2e50889adc/opentelemetry_sdk-1.39.1.tar.gz", hash = "sha256:cf4d4563caf7bff906c9f7967e2be22d0d6b349b908be0d90fb21c8e9c995cc6", size = 171460, upload-time = "2025-12-11T13:32:49.369Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2c/c5/6a852903d8bfac758c6dc6e9a68b015d3c33f2f1be5e9591e0f4b69c7e0a/opentelemetry_sdk-1.40.0-py3-none-any.whl", hash = "sha256:787d2154a71f4b3d81f20524a8ce061b7db667d24e46753f32a7bc48f1c1f3f1", size = 141951, upload-time = "2026-03-04T14:17:17.961Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/98/e91cf858f203d86f4eccdf763dcf01cf03f1dae80c3750f7e635bfa206b6/opentelemetry_sdk-1.39.1-py3-none-any.whl", hash = "sha256:4d5482c478513ecb0a5d938dcc61394e647066e0cc2676bee9f3af3f3f45f01c", size = 132565, upload-time = "2025-12-11T13:32:35.069Z" },
 ]
 
 [[package]]
@@ -4229,19 +4089,24 @@ name = "opentelemetry-semantic-conventions"
 version = "0.54b1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version < '3.11'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "deprecated", marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
@@ -4254,111 +4119,35 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.61b0"
+version = "0.60b1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version < '3.11'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
-    { name = "opentelemetry-api", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-api", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "typing-extensions", marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6d/c0/4ae7973f3c2cfd2b6e321f1675626f0dab0a97027cc7a297474c9c8f3d04/opentelemetry_semantic_conventions-0.61b0.tar.gz", hash = "sha256:072f65473c5d7c6dc0355b27d6c9d1a679d63b6d4b4b16a9773062cb7e31192a", size = 145755, upload-time = "2026-03-04T14:17:32.664Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b2/37/cc6a55e448deaa9b27377d087da8615a3416d8ad523d5960b78dbeadd02a/opentelemetry_semantic_conventions-0.61b0-py3-none-any.whl", hash = "sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2", size = 231621, upload-time = "2026-03-04T14:17:19.33Z" },
-]
-
-[[package]]
-name = "orjson"
-version = "3.11.7"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/de/1a/a373746fa6d0e116dd9e54371a7b54622c44d12296d5d0f3ad5e3ff33490/orjson-3.11.7-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:a02c833f38f36546ba65a452127633afce4cf0dd7296b753d3bb54e55e5c0174", size = 229140, upload-time = "2026-02-02T15:37:06.082Z" },
-    { url = "https://files.pythonhosted.org/packages/52/a2/fa129e749d500f9b183e8a3446a193818a25f60261e9ce143ad61e975208/orjson-3.11.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b63c6e6738d7c3470ad01601e23376aa511e50e1f3931395b9f9c722406d1a67", size = 128670, upload-time = "2026-02-02T15:37:08.002Z" },
-    { url = "https://files.pythonhosted.org/packages/08/93/1e82011cd1e0bd051ef9d35bed1aa7fb4ea1f0a055dc2c841b46b43a9ebd/orjson-3.11.7-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:043d3006b7d32c7e233b8cfb1f01c651013ea079e08dcef7189a29abd8befe11", size = 123832, upload-time = "2026-02-02T15:37:09.191Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/d8/a26b431ef962c7d55736674dddade876822f3e33223c1f47a36879350d04/orjson-3.11.7-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57036b27ac8a25d81112eb0cc9835cd4833c5b16e1467816adc0015f59e870dc", size = 129171, upload-time = "2026-02-02T15:37:11.112Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/19/f47819b84a580f490da260c3ee9ade214cf4cf78ac9ce8c1c758f80fdfc9/orjson-3.11.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:733ae23ada68b804b222c44affed76b39e30806d38660bf1eb200520d259cc16", size = 141967, upload-time = "2026-02-02T15:37:12.282Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/cd/37ece39a0777ba077fdcdbe4cccae3be8ed00290c14bf8afdc548befc260/orjson-3.11.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5fdfad2093bdd08245f2e204d977facd5f871c88c4a71230d5bcbd0e43bf6222", size = 130991, upload-time = "2026-02-02T15:37:13.465Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/ed/f2b5d66aa9b6b5c02ff5f120efc7b38c7c4962b21e6be0f00fd99a5c348e/orjson-3.11.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cededd6738e1c153530793998e31c05086582b08315db48ab66649768f326baa", size = 133674, upload-time = "2026-02-02T15:37:14.694Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/6e/baa83e68d1aa09fa8c3e5b2c087d01d0a0bd45256de719ed7bc22c07052d/orjson-3.11.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:14f440c7268c8f8633d1b3d443a434bd70cb15686117ea6beff8fdc8f5917a1e", size = 138722, upload-time = "2026-02-02T15:37:16.501Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/47/7f8ef4963b772cd56999b535e553f7eb5cd27e9dd6c049baee6f18bfa05d/orjson-3.11.7-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3a2479753bbb95b0ebcf7969f562cdb9668e6d12416a35b0dda79febf89cdea2", size = 409056, upload-time = "2026-02-02T15:37:17.895Z" },
-    { url = "https://files.pythonhosted.org/packages/38/eb/2df104dd2244b3618f25325a656f85cc3277f74bbd91224752410a78f3c7/orjson-3.11.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:71924496986275a737f38e3f22b4e0878882b3f7a310d2ff4dc96e812789120c", size = 144196, upload-time = "2026-02-02T15:37:19.349Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/2a/ee41de0aa3a6686598661eae2b4ebdff1340c65bfb17fcff8b87138aab21/orjson-3.11.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b4a9eefdc70bf8bf9857f0290f973dec534ac84c35cd6a7f4083be43e7170a8f", size = 134979, upload-time = "2026-02-02T15:37:20.906Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/fa/92fc5d3d402b87a8b28277a9ed35386218a6a5287c7fe5ee9b9f02c53fb2/orjson-3.11.7-cp310-cp310-win32.whl", hash = "sha256:ae9e0b37a834cef7ce8f99de6498f8fad4a2c0bf6bfc3d02abd8ed56aa15b2de", size = 127968, upload-time = "2026-02-02T15:37:23.178Z" },
-    { url = "https://files.pythonhosted.org/packages/07/29/a576bf36d73d60df06904d3844a9df08e25d59eba64363aaf8ec2f9bff41/orjson-3.11.7-cp310-cp310-win_amd64.whl", hash = "sha256:d772afdb22555f0c58cfc741bdae44180122b3616faa1ecadb595cd526e4c993", size = 125128, upload-time = "2026-02-02T15:37:24.329Z" },
-    { url = "https://files.pythonhosted.org/packages/37/02/da6cb01fc6087048d7f61522c327edf4250f1683a58a839fdcc435746dd5/orjson-3.11.7-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9487abc2c2086e7c8eb9a211d2ce8855bae0e92586279d0d27b341d5ad76c85c", size = 228664, upload-time = "2026-02-02T15:37:25.542Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/c2/5885e7a5881dba9a9af51bc564e8967225a642b3e03d089289a35054e749/orjson-3.11.7-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:79cacb0b52f6004caf92405a7e1f11e6e2de8bdf9019e4f76b44ba045125cd6b", size = 125344, upload-time = "2026-02-02T15:37:26.92Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/1d/4e7688de0a92d1caf600dfd5fb70b4c5bfff51dfa61ac555072ef2d0d32a/orjson-3.11.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e85fe4698b6a56d5e2ebf7ae87544d668eb6bde1ad1226c13f44663f20ec9e", size = 128404, upload-time = "2026-02-02T15:37:28.108Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/b2/ec04b74ae03a125db7bd69cffd014b227b7f341e3261bf75b5eb88a1aa92/orjson-3.11.7-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b8d14b71c0b12963fe8a62aac87119f1afdf4cb88a400f61ca5ae581449efcb5", size = 123677, upload-time = "2026-02-02T15:37:30.287Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/69/f95bdf960605f08f827f6e3291fe243d8aa9c5c9ff017a8d7232209184c3/orjson-3.11.7-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91c81ef070c8f3220054115e1ef468b1c9ce8497b4e526cb9f68ab4dc0a7ac62", size = 128950, upload-time = "2026-02-02T15:37:31.595Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/1b/de59c57bae1d148ef298852abd31909ac3089cff370dfd4cd84cc99cbc42/orjson-3.11.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:411ebaf34d735e25e358a6d9e7978954a9c9d58cfb47bc6683cdc3964cd2f910", size = 141756, upload-time = "2026-02-02T15:37:32.985Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/9e/9decc59f4499f695f65c650f6cfa6cd4c37a3fbe8fa235a0a3614cb54386/orjson-3.11.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a16bcd08ab0bcdfc7e8801d9c4a9cc17e58418e4d48ddc6ded4e9e4b1a94062b", size = 130812, upload-time = "2026-02-02T15:37:34.204Z" },
-    { url = "https://files.pythonhosted.org/packages/28/e6/59f932bcabd1eac44e334fe8e3281a92eacfcb450586e1f4bde0423728d8/orjson-3.11.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c0b51672e466fd7e56230ffbae7f1639e18d0ce023351fb75da21b71bc2c960", size = 133444, upload-time = "2026-02-02T15:37:35.446Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/36/b0f05c0eaa7ca30bc965e37e6a2956b0d67adb87a9872942d3568da846ae/orjson-3.11.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:136dcd6a2e796dfd9ffca9fc027d778567b0b7c9968d092842d3c323cef88aa8", size = 138609, upload-time = "2026-02-02T15:37:36.657Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/03/58ec7d302b8d86944c60c7b4b82975d5161fcce4c9bc8c6cb1d6741b6115/orjson-3.11.7-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:7ba61079379b0ae29e117db13bda5f28d939766e410d321ec1624afc6a0b0504", size = 408918, upload-time = "2026-02-02T15:37:38.076Z" },
-    { url = "https://files.pythonhosted.org/packages/06/3a/868d65ef9a8b99be723bd510de491349618abd9f62c826cf206d962db295/orjson-3.11.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0527a4510c300e3b406591b0ba69b5dc50031895b0a93743526a3fc45f59d26e", size = 143998, upload-time = "2026-02-02T15:37:39.706Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/c7/1e18e1c83afe3349f4f6dc9e14910f0ae5f82eac756d1412ea4018938535/orjson-3.11.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a709e881723c9b18acddcfb8ba357322491ad553e277cf467e1e7e20e2d90561", size = 134802, upload-time = "2026-02-02T15:37:41.002Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/0b/ccb7ee1a65b37e8eeb8b267dc953561d72370e85185e459616d4345bab34/orjson-3.11.7-cp311-cp311-win32.whl", hash = "sha256:c43b8b5bab288b6b90dac410cca7e986a4fa747a2e8f94615aea407da706980d", size = 127828, upload-time = "2026-02-02T15:37:42.241Z" },
-    { url = "https://files.pythonhosted.org/packages/af/9e/55c776dffda3f381e0f07d010a4f5f3902bf48eaba1bb7684d301acd4924/orjson-3.11.7-cp311-cp311-win_amd64.whl", hash = "sha256:6543001328aa857187f905308a028935864aefe9968af3848401b6fe80dbb471", size = 124941, upload-time = "2026-02-02T15:37:43.444Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/8e/424a620fa7d263b880162505fb107ef5e0afaa765b5b06a88312ac291560/orjson-3.11.7-cp311-cp311-win_arm64.whl", hash = "sha256:1ee5cc7160a821dfe14f130bc8e63e7611051f964b463d9e2a3a573204446a4d", size = 126245, upload-time = "2026-02-02T15:37:45.18Z" },
-    { url = "https://files.pythonhosted.org/packages/80/bf/76f4f1665f6983385938f0e2a5d7efa12a58171b8456c252f3bae8a4cf75/orjson-3.11.7-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bd03ea7606833655048dab1a00734a2875e3e86c276e1d772b2a02556f0d895f", size = 228545, upload-time = "2026-02-02T15:37:46.376Z" },
-    { url = "https://files.pythonhosted.org/packages/79/53/6c72c002cb13b5a978a068add59b25a8bdf2800ac1c9c8ecdb26d6d97064/orjson-3.11.7-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:89e440ebc74ce8ab5c7bc4ce6757b4a6b1041becb127df818f6997b5c71aa60b", size = 125224, upload-time = "2026-02-02T15:37:47.697Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/83/10e48852865e5dd151bdfe652c06f7da484578ed02c5fca938e3632cb0b8/orjson-3.11.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ede977b5fe5ac91b1dffc0a517ca4542d2ec8a6a4ff7b2652d94f640796342a", size = 128154, upload-time = "2026-02-02T15:37:48.954Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/52/a66e22a2b9abaa374b4a081d410edab6d1e30024707b87eab7c734afe28d/orjson-3.11.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b1dae39230a393df353827c855a5f176271c23434cfd2db74e0e424e693e10", size = 123548, upload-time = "2026-02-02T15:37:50.187Z" },
-    { url = "https://files.pythonhosted.org/packages/de/38/605d371417021359f4910c496f764c48ceb8997605f8c25bf1dfe58c0ebe/orjson-3.11.7-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed46f17096e28fb28d2975834836a639af7278aa87c84f68ab08fbe5b8bd75fa", size = 129000, upload-time = "2026-02-02T15:37:51.426Z" },
-    { url = "https://files.pythonhosted.org/packages/44/98/af32e842b0ffd2335c89714d48ca4e3917b42f5d6ee5537832e069a4b3ac/orjson-3.11.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3726be79e36e526e3d9c1aceaadbfb4a04ee80a72ab47b3f3c17fefb9812e7b8", size = 141686, upload-time = "2026-02-02T15:37:52.607Z" },
-    { url = "https://files.pythonhosted.org/packages/96/0b/fc793858dfa54be6feee940c1463370ece34b3c39c1ca0aa3845f5ba9892/orjson-3.11.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0724e265bc548af1dedebd9cb3d24b4e1c1e685a343be43e87ba922a5c5fff2f", size = 130812, upload-time = "2026-02-02T15:37:53.944Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/91/98a52415059db3f374757d0b7f0f16e3b5cd5976c90d1c2b56acaea039e6/orjson-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7745312efa9e11c17fbd3cb3097262d079da26930ae9ae7ba28fb738367cbad", size = 133440, upload-time = "2026-02-02T15:37:55.615Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/b6/cb540117bda61791f46381f8c26c8f93e802892830a6055748d3bb1925ab/orjson-3.11.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f904c24bdeabd4298f7a977ef14ca2a022ca921ed670b92ecd16ab6f3d01f867", size = 138386, upload-time = "2026-02-02T15:37:56.814Z" },
-    { url = "https://files.pythonhosted.org/packages/63/1a/50a3201c334a7f17c231eee5f841342190723794e3b06293f26e7cf87d31/orjson-3.11.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b9fc4d0f81f394689e0814617aadc4f2ea0e8025f38c226cbf22d3b5ddbf025d", size = 408853, upload-time = "2026-02-02T15:37:58.291Z" },
-    { url = "https://files.pythonhosted.org/packages/87/cd/8de1c67d0be44fdc22701e5989c0d015a2adf391498ad42c4dc589cd3013/orjson-3.11.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:849e38203e5be40b776ed2718e587faf204d184fc9a008ae441f9442320c0cab", size = 144130, upload-time = "2026-02-02T15:38:00.163Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/fe/d605d700c35dd55f51710d159fc54516a280923cd1b7e47508982fbb387d/orjson-3.11.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4682d1db3bcebd2b64757e0ddf9e87ae5f00d29d16c5cdf3a62f561d08cc3dd2", size = 134818, upload-time = "2026-02-02T15:38:01.507Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/e4/15ecc67edb3ddb3e2f46ae04475f2d294e8b60c1825fbe28a428b93b3fbd/orjson-3.11.7-cp312-cp312-win32.whl", hash = "sha256:f4f7c956b5215d949a1f65334cf9d7612dde38f20a95f2315deef167def91a6f", size = 127923, upload-time = "2026-02-02T15:38:02.75Z" },
-    { url = "https://files.pythonhosted.org/packages/34/70/2e0855361f76198a3965273048c8e50a9695d88cd75811a5b46444895845/orjson-3.11.7-cp312-cp312-win_amd64.whl", hash = "sha256:bf742e149121dc5648ba0a08ea0871e87b660467ef168a3a5e53bc1fbd64bb74", size = 125007, upload-time = "2026-02-02T15:38:04.032Z" },
-    { url = "https://files.pythonhosted.org/packages/68/40/c2051bd19fc467610fed469dc29e43ac65891571138f476834ca192bc290/orjson-3.11.7-cp312-cp312-win_arm64.whl", hash = "sha256:26c3b9132f783b7d7903bf1efb095fed8d4a3a85ec0d334ee8beff3d7a4749d5", size = 126089, upload-time = "2026-02-02T15:38:05.297Z" },
-    { url = "https://files.pythonhosted.org/packages/89/25/6e0e52cac5aab51d7b6dcd257e855e1dec1c2060f6b28566c509b4665f62/orjson-3.11.7-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1d98b30cc1313d52d4af17d9c3d307b08389752ec5f2e5febdfada70b0f8c733", size = 228390, upload-time = "2026-02-02T15:38:06.8Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/29/a77f48d2fc8a05bbc529e5ff481fb43d914f9e383ea2469d4f3d51df3d00/orjson-3.11.7-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:d897e81f8d0cbd2abb82226d1860ad2e1ab3ff16d7b08c96ca00df9d45409ef4", size = 125189, upload-time = "2026-02-02T15:38:08.181Z" },
-    { url = "https://files.pythonhosted.org/packages/89/25/0a16e0729a0e6a1504f9d1a13cdd365f030068aab64cec6958396b9969d7/orjson-3.11.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814be4b49b228cfc0b3c565acf642dd7d13538f966e3ccde61f4f55be3e20785", size = 128106, upload-time = "2026-02-02T15:38:09.41Z" },
-    { url = "https://files.pythonhosted.org/packages/66/da/a2e505469d60666a05ab373f1a6322eb671cb2ba3a0ccfc7d4bc97196787/orjson-3.11.7-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d06e5c5fed5caedd2e540d62e5b1c25e8c82431b9e577c33537e5fa4aa909539", size = 123363, upload-time = "2026-02-02T15:38:10.73Z" },
-    { url = "https://files.pythonhosted.org/packages/23/bf/ed73f88396ea35c71b38961734ea4a4746f7ca0768bf28fd551d37e48dd0/orjson-3.11.7-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31c80ce534ac4ea3739c5ee751270646cbc46e45aea7576a38ffec040b4029a1", size = 129007, upload-time = "2026-02-02T15:38:12.138Z" },
-    { url = "https://files.pythonhosted.org/packages/73/3c/b05d80716f0225fc9008fbf8ab22841dcc268a626aa550561743714ce3bf/orjson-3.11.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f50979824bde13d32b4320eedd513431c921102796d86be3eee0b58e58a3ecd1", size = 141667, upload-time = "2026-02-02T15:38:13.398Z" },
-    { url = "https://files.pythonhosted.org/packages/61/e8/0be9b0addd9bf86abfc938e97441dcd0375d494594b1c8ad10fe57479617/orjson-3.11.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e54f3808e2b6b945078c41aa8d9b5834b28c50843846e97807e5adb75fa9705", size = 130832, upload-time = "2026-02-02T15:38:14.698Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/ec/c68e3b9021a31d9ec15a94931db1410136af862955854ed5dd7e7e4f5bff/orjson-3.11.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12b80df61aab7b98b490fe9e4879925ba666fccdfcd175252ce4d9035865ace", size = 133373, upload-time = "2026-02-02T15:38:16.109Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/45/f3466739aaafa570cc8e77c6dbb853c48bf56e3b43738020e2661e08b0ac/orjson-3.11.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:996b65230271f1a97026fd0e6a753f51fbc0c335d2ad0c6201f711b0da32693b", size = 138307, upload-time = "2026-02-02T15:38:17.453Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/84/9f7f02288da1ffb31405c1be07657afd1eecbcb4b64ee2817b6fe0f785fa/orjson-3.11.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ab49d4b2a6a1d415ddb9f37a21e02e0d5dbfe10b7870b21bf779fc21e9156157", size = 408695, upload-time = "2026-02-02T15:38:18.831Z" },
-    { url = "https://files.pythonhosted.org/packages/18/07/9dd2f0c0104f1a0295ffbe912bc8d63307a539b900dd9e2c48ef7810d971/orjson-3.11.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:390a1dce0c055ddf8adb6aa94a73b45a4a7d7177b5c584b8d1c1947f2ba60fb3", size = 144099, upload-time = "2026-02-02T15:38:20.28Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/66/857a8e4a3292e1f7b1b202883bcdeb43a91566cf59a93f97c53b44bd6801/orjson-3.11.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1eb80451a9c351a71dfaf5b7ccc13ad065405217726b59fdbeadbcc544f9d223", size = 134806, upload-time = "2026-02-02T15:38:22.186Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/5b/6ebcf3defc1aab3a338ca777214966851e92efb1f30dc7fc8285216e6d1b/orjson-3.11.7-cp313-cp313-win32.whl", hash = "sha256:7477aa6a6ec6139c5cb1cc7b214643592169a5494d200397c7fc95d740d5fcf3", size = 127914, upload-time = "2026-02-02T15:38:23.511Z" },
-    { url = "https://files.pythonhosted.org/packages/00/04/c6f72daca5092e3117840a1b1e88dfc809cc1470cf0734890d0366b684a1/orjson-3.11.7-cp313-cp313-win_amd64.whl", hash = "sha256:b9f95dcdea9d4f805daa9ddf02617a89e484c6985fa03055459f90e87d7a0757", size = 124986, upload-time = "2026-02-02T15:38:24.836Z" },
-    { url = "https://files.pythonhosted.org/packages/03/ba/077a0f6f1085d6b806937246860fafbd5b17f3919c70ee3f3d8d9c713f38/orjson-3.11.7-cp313-cp313-win_arm64.whl", hash = "sha256:800988273a014a0541483dc81021247d7eacb0c845a9d1a34a422bc718f41539", size = 126045, upload-time = "2026-02-02T15:38:26.216Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/1e/745565dca749813db9a093c5ebc4bac1a9475c64d54b95654336ac3ed961/orjson-3.11.7-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:de0a37f21d0d364954ad5de1970491d7fbd0fb1ef7417d4d56a36dc01ba0c0a0", size = 228391, upload-time = "2026-02-02T15:38:27.757Z" },
-    { url = "https://files.pythonhosted.org/packages/46/19/e40f6225da4d3aa0c8dc6e5219c5e87c2063a560fe0d72a88deb59776794/orjson-3.11.7-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:c2428d358d85e8da9d37cba18b8c4047c55222007a84f97156a5b22028dfbfc0", size = 125188, upload-time = "2026-02-02T15:38:29.241Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/7e/c4de2babef2c0817fd1f048fd176aa48c37bec8aef53d2fa932983032cce/orjson-3.11.7-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c4bc6c6ac52cdaa267552544c73e486fecbd710b7ac09bc024d5a78555a22f6", size = 128097, upload-time = "2026-02-02T15:38:30.618Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/74/233d360632bafd2197f217eee7fb9c9d0229eac0c18128aee5b35b0014fe/orjson-3.11.7-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd0d68edd7dfca1b2eca9361a44ac9f24b078de3481003159929a0573f21a6bf", size = 123364, upload-time = "2026-02-02T15:38:32.363Z" },
-    { url = "https://files.pythonhosted.org/packages/79/51/af79504981dd31efe20a9e360eb49c15f06df2b40e7f25a0a52d9ae888e8/orjson-3.11.7-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:623ad1b9548ef63886319c16fa317848e465a21513b31a6ad7b57443c3e0dcf5", size = 129076, upload-time = "2026-02-02T15:38:33.68Z" },
-    { url = "https://files.pythonhosted.org/packages/67/e2/da898eb68b72304f8de05ca6715870d09d603ee98d30a27e8a9629abc64b/orjson-3.11.7-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6e776b998ac37c0396093d10290e60283f59cfe0fc3fccbd0ccc4bd04dd19892", size = 141705, upload-time = "2026-02-02T15:38:34.989Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/89/15364d92acb3d903b029e28d834edb8780c2b97404cbf7929aa6b9abdb24/orjson-3.11.7-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:652c6c3af76716f4a9c290371ba2e390ede06f6603edb277b481daf37f6f464e", size = 130855, upload-time = "2026-02-02T15:38:36.379Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/8b/ecdad52d0b38d4b8f514be603e69ccd5eacf4e7241f972e37e79792212ec/orjson-3.11.7-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a56df3239294ea5964adf074c54bcc4f0ccd21636049a2cf3ca9cf03b5d03cf1", size = 133386, upload-time = "2026-02-02T15:38:37.704Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/0e/45e1dcf10e17d0924b7c9162f87ec7b4ca79e28a0548acf6a71788d3e108/orjson-3.11.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bda117c4148e81f746655d5a3239ae9bd00cb7bc3ca178b5fc5a5997e9744183", size = 138295, upload-time = "2026-02-02T15:38:39.096Z" },
-    { url = "https://files.pythonhosted.org/packages/63/d7/4d2e8b03561257af0450f2845b91fbd111d7e526ccdf737267108075e0ba/orjson-3.11.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:23d6c20517a97a9daf1d48b580fcdc6f0516c6f4b5038823426033690b4d2650", size = 408720, upload-time = "2026-02-02T15:38:40.634Z" },
-    { url = "https://files.pythonhosted.org/packages/78/cf/d45343518282108b29c12a65892445fc51f9319dc3c552ceb51bb5905ed2/orjson-3.11.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:8ff206156006da5b847c9304b6308a01e8cdbc8cce824e2779a5ba71c3def141", size = 144152, upload-time = "2026-02-02T15:38:42.262Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/3a/d6001f51a7275aacd342e77b735c71fa04125a3f93c36fee4526bc8c654e/orjson-3.11.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:962d046ee1765f74a1da723f4b33e3b228fe3a48bd307acce5021dfefe0e29b2", size = 134814, upload-time = "2026-02-02T15:38:43.627Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/d3/f19b47ce16820cc2c480f7f1723e17f6d411b3a295c60c8ad3aa9ff1c96a/orjson-3.11.7-cp314-cp314-win32.whl", hash = "sha256:89e13dd3f89f1c38a9c9eba5fbf7cdc2d1feca82f5f290864b4b7a6aac704576", size = 127997, upload-time = "2026-02-02T15:38:45.06Z" },
-    { url = "https://files.pythonhosted.org/packages/12/df/172771902943af54bf661a8d102bdf2e7f932127968080632bda6054b62c/orjson-3.11.7-cp314-cp314-win_amd64.whl", hash = "sha256:845c3e0d8ded9c9271cd79596b9b552448b885b97110f628fb687aee2eed11c1", size = 124985, upload-time = "2026-02-02T15:38:46.388Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/1c/f2a8d8a1b17514660a614ce5f7aac74b934e69f5abc2700cc7ced882a009/orjson-3.11.7-cp314-cp314-win_arm64.whl", hash = "sha256:4a2e9c5be347b937a2e0203866f12bba36082e89b402ddb9e927d5822e43088d", size = 126038, upload-time = "2026-02-02T15:38:47.703Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/91/df/553f93ed38bf22f4b999d9be9c185adb558982214f33eae539d3b5cd0858/opentelemetry_semantic_conventions-0.60b1.tar.gz", hash = "sha256:87c228b5a0669b748c76d76df6c364c369c28f1c465e50f661e39737e84bc953", size = 137935, upload-time = "2025-12-11T13:32:50.487Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7a/5e/5958555e09635d09b75de3c4f8b9cae7335ca545d77392ffe7331534c402/opentelemetry_semantic_conventions-0.60b1-py3-none-any.whl", hash = "sha256:9fa8c8b0c110da289809292b0591220d3a7b53c1526a23021e977d68597893fb", size = 219982, upload-time = "2025-12-11T13:32:36.955Z" },
 ]
 
 [[package]]
@@ -4384,7 +4173,8 @@ name = "pandas"
 version = "2.3.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.11'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
@@ -4448,21 +4238,25 @@ name = "pandas"
 version = "3.0.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
     { name = "python-dateutil", marker = "python_full_version >= '3.11'" },
     { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32')" },
 ]
@@ -4650,11 +4444,11 @@ wheels = [
 
 [[package]]
 name = "platformdirs"
-version = "4.9.4"
+version = "4.9.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/19/56/8d4c30c8a1d07013911a8fdbd8f89440ef9f08d07a1b50ab8ca8be5a20f9/platformdirs-4.9.4.tar.gz", hash = "sha256:1ec356301b7dc906d83f371c8f487070e99d3ccf9e501686456394622a01a934", size = 28737, upload-time = "2026-03-05T18:34:13.271Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/04/fea538adf7dbbd6d186f551d595961e564a3b6715bdf276b477460858672/platformdirs-4.9.2.tar.gz", hash = "sha256:9a33809944b9db043ad67ca0db94b14bf452cc6aeaac46a88ea55b26e2e9d291", size = 28394, upload-time = "2026-02-16T03:56:10.574Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/63/d7/97f7e3a6abb67d8080dd406fd4df842c2be0efaf712d1c899c32a075027c/platformdirs-4.9.4-py3-none-any.whl", hash = "sha256:68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868", size = 21216, upload-time = "2026-03-05T18:34:12.172Z" },
+    { url = "https://files.pythonhosted.org/packages/48/31/05e764397056194206169869b50cf2fee4dbbbc71b344705b9c0d878d4d8/platformdirs-4.9.2-py3-none-any.whl", hash = "sha256:9170634f126f8efdae22fb58ae8a0eaa86f38365bc57897a6c4f781d1f5875bd", size = 21168, upload-time = "2026-02-16T03:56:08.891Z" },
 ]
 
 [[package]]
@@ -5209,14 +5003,11 @@ wheels = [
 
 [[package]]
 name = "pyjwt"
-version = "2.12.1"
+version = "2.11.0"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/5a/b46fa56bf322901eee5b0454a34343cdbdae202cd421775a8ee4e42fd519/pyjwt-2.11.0.tar.gz", hash = "sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623", size = 98019, upload-time = "2026-01-30T19:59:55.694Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" },
 ]
 
 [package.optional-dependencies]
@@ -5231,8 +5022,8 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "astroid" },
     { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'win32' or extra == 'extra-13-megatron-core-dev'" },
-    { name = "dill", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'win32' and extra != 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "dill", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "isort" },
     { name = "mccabe" },
     { name = "platformdirs" },
@@ -5373,30 +5164,17 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
 ]
 
-[[package]]
-name = "python-discovery"
-version = "1.1.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "filelock" },
-    { name = "platformdirs" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/d7/7e/9f3b0dd3a074a6c3e1e79f35e465b1f2ee4b262d619de00cfce523cc9b24/python_discovery-1.1.3.tar.gz", hash = "sha256:7acca36e818cd88e9b2ba03e045ad7e93e1713e29c6bbfba5d90202310b7baa5", size = 56945, upload-time = "2026-03-10T15:08:15.038Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e7/80/73211fc5bfbfc562369b4aa61dc1e4bf07dc7b34df7b317e4539316b809c/python_discovery-1.1.3-py3-none-any.whl", hash = "sha256:90e795f0121bc84572e737c9aa9966311b9fde44ffb88a5953b3ec9b31c6945e", size = 31485, upload-time = "2026-03-10T15:08:13.06Z" },
-]
-
 [[package]]
 name = "python-gitlab"
-version = "8.1.0"
+version = "8.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "requests" },
     { name = "requests-toolbelt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/20/1d/a62fea1f3312fd9e58af41466ae072796a09684dd0cd825cc042ba39488c/python_gitlab-8.1.0.tar.gz", hash = "sha256:660f15e3f889ec430797d260322bc61d90f8d90accfc10ba37593b11aed371bd", size = 401576, upload-time = "2026-02-28T01:26:32.757Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c4/68/02645bc9d71554e7a263b118e4e55dafe4c4735c1ba74f9712232ed84054/python_gitlab-8.0.0.tar.gz", hash = "sha256:03eae5a9d105448796e6c0e192d402c266057e75790cf4f42c143dddf91313ce", size = 401334, upload-time = "2026-01-28T01:22:27.005Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/d4/9848be62ef23fcac203f4386faf43a2cc13a4888447b3f5fbf7346f31374/python_gitlab-8.1.0-py3-none-any.whl", hash = "sha256:b1a59e81e5e0363185b446a707dc92c27ee8bf1fc14ce75ed8eafa58cbdce63a", size = 144498, upload-time = "2026-02-28T01:26:31.14Z" },
+    { url = "https://files.pythonhosted.org/packages/52/60/ba68e51e90a99b14af639463e5d617239029ec25927a0990ff28bd851916/python_gitlab-8.0.0-py3-none-any.whl", hash = "sha256:c635e6722c5710d35ddadfcf95c362b0aa8de11ab3972bc4f230ebd58a6c49ee", size = 144483, upload-time = "2026-01-28T01:22:25.772Z" },
 ]
 
 [[package]]
@@ -5410,11 +5188,11 @@ wheels = [
 
 [[package]]
 name = "pytz"
-version = "2026.1.post1"
+version = "2025.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/56/db/b8721d71d945e6a8ac63c0fc900b2067181dbb50805958d4d4661cf7d277/pytz-2026.1.post1.tar.gz", hash = "sha256:3378dde6a0c3d26719182142c56e60c7f9af7e968076f31aae569d72a0358ee1", size = 321088, upload-time = "2026-03-03T07:47:50.683Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/99/781fe0c827be2742bcc775efefccb3b048a3a9c6ce9aec0cbf4a101677e5/pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a", size = 510489, upload-time = "2026-03-03T07:47:49.167Z" },
+    { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
 ]
 
 [[package]]
@@ -5587,10 +5365,10 @@ default = [
     { name = "grpcio" },
     { name = "opencensus" },
     { name = "opentelemetry-exporter-prometheus", version = "0.54b1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
-    { name = "opentelemetry-exporter-prometheus", version = "0.61b0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-exporter-prometheus", version = "0.60b1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "opentelemetry-proto" },
     { name = "opentelemetry-sdk", version = "1.33.1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
-    { name = "opentelemetry-sdk", version = "1.40.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
+    { name = "opentelemetry-sdk", version = "1.39.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts')" },
     { name = "prometheus-client" },
     { name = "py-spy" },
     { name = "pydantic" },
@@ -5615,123 +5393,123 @@ wheels = [
 
 [[package]]
 name = "regex"
-version = "2026.2.28"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8b/71/41455aa99a5a5ac1eaf311f5d8efd9ce6433c03ac1e0962de163350d0d97/regex-2026.2.28.tar.gz", hash = "sha256:a729e47d418ea11d03469f321aaf67cdee8954cde3ff2cf8403ab87951ad10f2", size = 415184, upload-time = "2026-02-28T02:19:42.792Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/70/b8/845a927e078f5e5cc55d29f57becbfde0003d52806544531ab3f2da4503c/regex-2026.2.28-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fc48c500838be6882b32748f60a15229d2dea96e59ef341eaa96ec83538f498d", size = 488461, upload-time = "2026-02-28T02:15:48.405Z" },
-    { url = "https://files.pythonhosted.org/packages/32/f9/8a0034716684e38a729210ded6222249f29978b24b684f448162ef21f204/regex-2026.2.28-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2afa673660928d0b63d84353c6c08a8a476ddfc4a47e11742949d182e6863ce8", size = 290774, upload-time = "2026-02-28T02:15:51.738Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/ba/b27feefffbb199528dd32667cd172ed484d9c197618c575f01217fbe6103/regex-2026.2.28-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7ab218076eb0944549e7fe74cf0e2b83a82edb27e81cc87411f76240865e04d5", size = 288737, upload-time = "2026-02-28T02:15:53.534Z" },
-    { url = "https://files.pythonhosted.org/packages/18/c5/65379448ca3cbfe774fcc33774dc8295b1ee97dc3237ae3d3c7b27423c9d/regex-2026.2.28-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94d63db12e45a9b9f064bfe4800cefefc7e5f182052e4c1b774d46a40ab1d9bb", size = 782675, upload-time = "2026-02-28T02:15:55.488Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/30/6fa55bef48090f900fbd4649333791fc3e6467380b9e775e741beeb3231f/regex-2026.2.28-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:195237dc327858a7721bf8b0bbbef797554bc13563c3591e91cd0767bacbe359", size = 850514, upload-time = "2026-02-28T02:15:57.509Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/28/9ca180fb3787a54150209754ac06a42409913571fa94994f340b3bba4e1e/regex-2026.2.28-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b387a0d092dac157fb026d737dde35ff3e49ef27f285343e7c6401851239df27", size = 896612, upload-time = "2026-02-28T02:15:59.682Z" },
-    { url = "https://files.pythonhosted.org/packages/46/b5/f30d7d3936d6deecc3ea7bea4f7d3c5ee5124e7c8de372226e436b330a55/regex-2026.2.28-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3935174fa4d9f70525a4367aaff3cb8bc0548129d114260c29d9dfa4a5b41692", size = 791691, upload-time = "2026-02-28T02:16:01.752Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/34/96631bcf446a56ba0b2a7f684358a76855dfe315b7c2f89b35388494ede0/regex-2026.2.28-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b2b23587b26496ff5fd40df4278becdf386813ec00dc3533fa43a4cf0e2ad3c", size = 783111, upload-time = "2026-02-28T02:16:03.651Z" },
-    { url = "https://files.pythonhosted.org/packages/39/54/f95cb7a85fe284d41cd2f3625e0f2ae30172b55dfd2af1d9b4eaef6259d7/regex-2026.2.28-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3b24bd7e9d85dc7c6a8bd2aa14ecd234274a0248335a02adeb25448aecdd420d", size = 767512, upload-time = "2026-02-28T02:16:05.616Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/af/a650f64a79c02a97f73f64d4e7fc4cc1984e64affab14075e7c1f9a2db34/regex-2026.2.28-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bd477d5f79920338107f04aa645f094032d9e3030cc55be581df3d1ef61aa318", size = 773920, upload-time = "2026-02-28T02:16:08.325Z" },
-    { url = "https://files.pythonhosted.org/packages/72/f8/3f9c2c2af37aedb3f5a1e7227f81bea065028785260d9cacc488e43e6997/regex-2026.2.28-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:b49eb78048c6354f49e91e4b77da21257fecb92256b6d599ae44403cab30b05b", size = 846681, upload-time = "2026-02-28T02:16:10.381Z" },
-    { url = "https://files.pythonhosted.org/packages/54/12/8db04a334571359f4d127d8f89550917ec6561a2fddfd69cd91402b47482/regex-2026.2.28-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:a25c7701e4f7a70021db9aaf4a4a0a67033c6318752146e03d1b94d32006217e", size = 755565, upload-time = "2026-02-28T02:16:11.972Z" },
-    { url = "https://files.pythonhosted.org/packages/da/bc/91c22f384d79324121b134c267a86ca90d11f8016aafb1dc5bee05890ee3/regex-2026.2.28-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:9dd450db6458387167e033cfa80887a34c99c81d26da1bf8b0b41bf8c9cac88e", size = 835789, upload-time = "2026-02-28T02:16:14.036Z" },
-    { url = "https://files.pythonhosted.org/packages/46/a7/4cc94fd3af01dcfdf5a9ed75c8e15fd80fcd62cc46da7592b1749e9c35db/regex-2026.2.28-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2954379dd20752e82d22accf3ff465311cbb2bac6c1f92c4afd400e1757f7451", size = 780094, upload-time = "2026-02-28T02:16:15.468Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/21/e5a38f420af3c77cab4a65f0c3a55ec02ac9babf04479cfd282d356988a6/regex-2026.2.28-cp310-cp310-win32.whl", hash = "sha256:1f8b17be5c27a684ea6759983c13506bd77bfc7c0347dff41b18ce5ddd2ee09a", size = 266025, upload-time = "2026-02-28T02:16:16.828Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/0a/205c4c1466a36e04d90afcd01d8908bac327673050c7fe316b2416d99d3d/regex-2026.2.28-cp310-cp310-win_amd64.whl", hash = "sha256:dd8847c4978bc3c7e6c826fb745f5570e518b8459ac2892151ce6627c7bc00d5", size = 277965, upload-time = "2026-02-28T02:16:18.752Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/4d/29b58172f954b6ec2c5ed28529a65e9026ab96b4b7016bcd3858f1c31d3c/regex-2026.2.28-cp310-cp310-win_arm64.whl", hash = "sha256:73cdcdbba8028167ea81490c7f45280113e41db2c7afb65a276f4711fa3bcbff", size = 270336, upload-time = "2026-02-28T02:16:20.735Z" },
-    { url = "https://files.pythonhosted.org/packages/04/db/8cbfd0ba3f302f2d09dd0019a9fcab74b63fee77a76c937d0e33161fb8c1/regex-2026.2.28-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e621fb7c8dc147419b28e1702f58a0177ff8308a76fa295c71f3e7827849f5d9", size = 488462, upload-time = "2026-02-28T02:16:22.616Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/10/ccc22c52802223f2368731964ddd117799e1390ffc39dbb31634a83022ee/regex-2026.2.28-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0d5bef2031cbf38757a0b0bc4298bb4824b6332d28edc16b39247228fbdbad97", size = 290774, upload-time = "2026-02-28T02:16:23.993Z" },
-    { url = "https://files.pythonhosted.org/packages/62/b9/6796b3bf3101e64117201aaa3a5a030ec677ecf34b3cd6141b5d5c6c67d5/regex-2026.2.28-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bcb399ed84eabf4282587ba151f2732ad8168e66f1d3f85b1d038868fe547703", size = 288724, upload-time = "2026-02-28T02:16:25.403Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/02/291c0ae3f3a10cea941d0f5366da1843d8d1fa8a25b0671e20a0e454bb38/regex-2026.2.28-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7c1b34dfa72f826f535b20712afa9bb3ba580020e834f3c69866c5bddbf10098", size = 791924, upload-time = "2026-02-28T02:16:26.863Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/57/f0235cc520d9672742196c5c15098f8f703f2758d48d5a7465a56333e496/regex-2026.2.28-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:851fa70df44325e1e4cdb79c5e676e91a78147b1b543db2aec8734d2add30ec2", size = 860095, upload-time = "2026-02-28T02:16:28.772Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/7c/393c94cbedda79a0f5f2435ebd01644aba0b338d327eb24b4aa5b8d6c07f/regex-2026.2.28-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:516604edd17b1c2c3e579cf4e9b25a53bf8fa6e7cedddf1127804d3e0140ca64", size = 906583, upload-time = "2026-02-28T02:16:30.977Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/73/a72820f47ca5abf2b5d911d0407ba5178fc52cf9780191ed3a54f5f419a2/regex-2026.2.28-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e7ce83654d1ab701cb619285a18a8e5a889c1216d746ddc710c914ca5fd71022", size = 800234, upload-time = "2026-02-28T02:16:32.55Z" },
-    { url = "https://files.pythonhosted.org/packages/34/b3/6e6a4b7b31fa998c4cf159a12cbeaf356386fbd1a8be743b1e80a3da51e4/regex-2026.2.28-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f2791948f7c70bb9335a9102df45e93d428f4b8128020d85920223925d73b9e1", size = 772803, upload-time = "2026-02-28T02:16:34.029Z" },
-    { url = "https://files.pythonhosted.org/packages/10/e7/5da0280c765d5a92af5e1cd324b3fe8464303189cbaa449de9a71910e273/regex-2026.2.28-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:03a83cc26aa2acda6b8b9dfe748cf9e84cbd390c424a1de34fdcef58961a297a", size = 781117, upload-time = "2026-02-28T02:16:36.253Z" },
-    { url = "https://files.pythonhosted.org/packages/76/39/0b8d7efb256ae34e1b8157acc1afd8758048a1cf0196e1aec2e71fd99f4b/regex-2026.2.28-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ec6f5674c5dc836994f50f1186dd1fafde4be0666aae201ae2fcc3d29d8adf27", size = 854224, upload-time = "2026-02-28T02:16:38.119Z" },
-    { url = "https://files.pythonhosted.org/packages/21/ff/a96d483ebe8fe6d1c67907729202313895d8de8495569ec319c6f29d0438/regex-2026.2.28-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:50c2fc924749543e0eacc93ada6aeeb3ea5f6715825624baa0dccaec771668ae", size = 761898, upload-time = "2026-02-28T02:16:40.333Z" },
-    { url = "https://files.pythonhosted.org/packages/89/bd/d4f2e75cb4a54b484e796017e37c0d09d8a0a837de43d17e238adf163f4e/regex-2026.2.28-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:ba55c50f408fb5c346a3a02d2ce0ebc839784e24f7c9684fde328ff063c3cdea", size = 844832, upload-time = "2026-02-28T02:16:41.875Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/a7/428a135cf5e15e4e11d1e696eb2bf968362f8ea8a5f237122e96bc2ae950/regex-2026.2.28-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:edb1b1b3a5576c56f08ac46f108c40333f222ebfd5cf63afdfa3aab0791ebe5b", size = 788347, upload-time = "2026-02-28T02:16:43.472Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/59/68691428851cf9c9c3707217ab1d9b47cfeec9d153a49919e6c368b9e926/regex-2026.2.28-cp311-cp311-win32.whl", hash = "sha256:948c12ef30ecedb128903c2c2678b339746eb7c689c5c21957c4a23950c96d15", size = 266033, upload-time = "2026-02-28T02:16:45.094Z" },
-    { url = "https://files.pythonhosted.org/packages/42/8b/1483de1c57024e89296cbcceb9cccb3f625d416ddb46e570be185c9b05a9/regex-2026.2.28-cp311-cp311-win_amd64.whl", hash = "sha256:fd63453f10d29097cc3dc62d070746523973fb5aa1c66d25f8558bebd47fed61", size = 277978, upload-time = "2026-02-28T02:16:46.75Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/36/abec45dc6e7252e3dbc797120496e43bb5730a7abf0d9cb69340696a2f2d/regex-2026.2.28-cp311-cp311-win_arm64.whl", hash = "sha256:00f2b8d9615aa165fdff0a13f1a92049bfad555ee91e20d246a51aa0b556c60a", size = 270340, upload-time = "2026-02-28T02:16:48.626Z" },
-    { url = "https://files.pythonhosted.org/packages/07/42/9061b03cf0fc4b5fa2c3984cbbaed54324377e440a5c5a29d29a72518d62/regex-2026.2.28-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fcf26c3c6d0da98fada8ae4ef0aa1c3405a431c0a77eb17306d38a89b02adcd7", size = 489574, upload-time = "2026-02-28T02:16:50.455Z" },
-    { url = "https://files.pythonhosted.org/packages/77/83/0c8a5623a233015595e3da499c5a1c13720ac63c107897a6037bb97af248/regex-2026.2.28-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02473c954af35dd2defeb07e44182f5705b30ea3f351a7cbffa9177beb14da5d", size = 291426, upload-time = "2026-02-28T02:16:52.52Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/06/3ef1ac6910dc3295ebd71b1f9bfa737e82cfead211a18b319d45f85ddd09/regex-2026.2.28-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9b65d33a17101569f86d9c5966a8b1d7fbf8afdda5a8aa219301b0a80f58cf7d", size = 289200, upload-time = "2026-02-28T02:16:54.08Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/c9/8cc8d850b35ab5650ff6756a1cb85286e2000b66c97520b29c1587455344/regex-2026.2.28-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e71dcecaa113eebcc96622c17692672c2d104b1d71ddf7adeda90da7ddeb26fc", size = 796765, upload-time = "2026-02-28T02:16:55.905Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/5d/57702597627fc23278ebf36fbb497ac91c0ce7fec89ac6c81e420ca3e38c/regex-2026.2.28-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:481df4623fa4969c8b11f3433ed7d5e3dc9cec0f008356c3212b3933fb77e3d8", size = 863093, upload-time = "2026-02-28T02:16:58.094Z" },
-    { url = "https://files.pythonhosted.org/packages/02/6d/f3ecad537ca2811b4d26b54ca848cf70e04fcfc138667c146a9f3157779c/regex-2026.2.28-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:64e7c6ad614573e0640f271e811a408d79a9e1fe62a46adb602f598df42a818d", size = 909455, upload-time = "2026-02-28T02:17:00.918Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/40/bb226f203caa22c1043c1ca79b36340156eca0f6a6742b46c3bb222a3a57/regex-2026.2.28-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6b08a06976ff4fb0d83077022fde3eca06c55432bb997d8c0495b9a4e9872f4", size = 802037, upload-time = "2026-02-28T02:17:02.842Z" },
-    { url = "https://files.pythonhosted.org/packages/44/7c/c6d91d8911ac6803b45ca968e8e500c46934e58c0903cbc6d760ee817a0a/regex-2026.2.28-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:864cdd1a2ef5716b0ab468af40139e62ede1b3a53386b375ec0786bb6783fc05", size = 775113, upload-time = "2026-02-28T02:17:04.506Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/8d/4a9368d168d47abd4158580b8c848709667b1cd293ff0c0c277279543bd0/regex-2026.2.28-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:511f7419f7afab475fd4d639d4aedfc54205bcb0800066753ef68a59f0f330b5", size = 784194, upload-time = "2026-02-28T02:17:06.888Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/bf/2c72ab5d8b7be462cb1651b5cc333da1d0068740342f350fcca3bca31947/regex-2026.2.28-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b42f7466e32bf15a961cf09f35fa6323cc72e64d3d2c990b10de1274a5da0a59", size = 856846, upload-time = "2026-02-28T02:17:09.11Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/f4/6b65c979bb6d09f51bb2d2a7bc85de73c01ec73335d7ddd202dcb8cd1c8f/regex-2026.2.28-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8710d61737b0c0ce6836b1da7109f20d495e49b3809f30e27e9560be67a257bf", size = 763516, upload-time = "2026-02-28T02:17:11.004Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/32/29ea5e27400ee86d2cc2b4e80aa059df04eaf78b4f0c18576ae077aeff68/regex-2026.2.28-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4390c365fd2d45278f45afd4673cb90f7285f5701607e3ad4274df08e36140ae", size = 849278, upload-time = "2026-02-28T02:17:12.693Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/91/3233d03b5f865111cd517e1c95ee8b43e8b428d61fa73764a80c9bb6f537/regex-2026.2.28-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb3b1db8ff6c7b8bf838ab05583ea15230cb2f678e569ab0e3a24d1e8320940b", size = 790068, upload-time = "2026-02-28T02:17:14.9Z" },
-    { url = "https://files.pythonhosted.org/packages/76/92/abc706c1fb03b4580a09645b206a3fc032f5a9f457bc1a8038ac555658ab/regex-2026.2.28-cp312-cp312-win32.whl", hash = "sha256:f8ed9a5d4612df9d4de15878f0bc6aa7a268afbe5af21a3fdd97fa19516e978c", size = 266416, upload-time = "2026-02-28T02:17:17.15Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/06/2a6f7dff190e5fa9df9fb4acf2fdf17a1aa0f7f54596cba8de608db56b3a/regex-2026.2.28-cp312-cp312-win_amd64.whl", hash = "sha256:01d65fd24206c8e1e97e2e31b286c59009636c022eb5d003f52760b0f42155d4", size = 277297, upload-time = "2026-02-28T02:17:18.723Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/f0/58a2484851fadf284458fdbd728f580d55c1abac059ae9f048c63b92f427/regex-2026.2.28-cp312-cp312-win_arm64.whl", hash = "sha256:c0b5ccbb8ffb433939d248707d4a8b31993cb76ab1a0187ca886bf50e96df952", size = 270408, upload-time = "2026-02-28T02:17:20.328Z" },
-    { url = "https://files.pythonhosted.org/packages/87/f6/dc9ef48c61b79c8201585bf37fa70cd781977da86e466cd94e8e95d2443b/regex-2026.2.28-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6d63a07e5ec8ce7184452cb00c41c37b49e67dc4f73b2955b5b8e782ea970784", size = 489311, upload-time = "2026-02-28T02:17:22.591Z" },
-    { url = "https://files.pythonhosted.org/packages/95/c8/c20390f2232d3f7956f420f4ef1852608ad57aa26c3dd78516cb9f3dc913/regex-2026.2.28-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e59bc8f30414d283ae8ee1617b13d8112e7135cb92830f0ec3688cb29152585a", size = 291285, upload-time = "2026-02-28T02:17:24.355Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/a6/ba1068a631ebd71a230e7d8013fcd284b7c89c35f46f34a7da02082141b1/regex-2026.2.28-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:de0cf053139f96219ccfabb4a8dd2d217c8c82cb206c91d9f109f3f552d6b43d", size = 289051, upload-time = "2026-02-28T02:17:26.722Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/1b/7cc3b7af4c244c204b7a80924bd3d85aecd9ba5bc82b485c5806ee8cda9e/regex-2026.2.28-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb4db2f17e6484904f986c5a657cec85574c76b5c5e61c7aae9ffa1bc6224f95", size = 796842, upload-time = "2026-02-28T02:17:29.064Z" },
-    { url = "https://files.pythonhosted.org/packages/24/87/26bd03efc60e0d772ac1e7b60a2e6325af98d974e2358f659c507d3c76db/regex-2026.2.28-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:52b017b35ac2214d0db5f4f90e303634dc44e4aba4bd6235a27f97ecbe5b0472", size = 863083, upload-time = "2026-02-28T02:17:31.363Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/54/aeaf4afb1aa0a65e40de52a61dc2ac5b00a83c6cb081c8a1d0dda74f3010/regex-2026.2.28-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:69fc560ccbf08a09dc9b52ab69cacfae51e0ed80dc5693078bdc97db2f91ae96", size = 909412, upload-time = "2026-02-28T02:17:33.248Z" },
-    { url = "https://files.pythonhosted.org/packages/12/2f/049901def913954e640d199bbc6a7ca2902b6aeda0e5da9d17f114100ec2/regex-2026.2.28-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e61eea47230eba62a31f3e8a0e3164d0f37ef9f40529fb2c79361bc6b53d2a92", size = 802101, upload-time = "2026-02-28T02:17:35.053Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/a5/512fb9ff7f5b15ea204bb1967ebb649059446decacccb201381f9fa6aad4/regex-2026.2.28-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4f5c0b182ad4269e7381b7c27fdb0408399881f7a92a4624fd5487f2971dfc11", size = 775260, upload-time = "2026-02-28T02:17:37.692Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/a8/9a92935878aba19bd72706b9db5646a6f993d99b3f6ed42c02ec8beb1d61/regex-2026.2.28-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:96f6269a2882fbb0ee76967116b83679dc628e68eaea44e90884b8d53d833881", size = 784311, upload-time = "2026-02-28T02:17:39.855Z" },
-    { url = "https://files.pythonhosted.org/packages/09/d3/fc51a8a738a49a6b6499626580554c9466d3ea561f2b72cfdc72e4149773/regex-2026.2.28-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b5acd4b6a95f37c3c3828e5d053a7d4edaedb85de551db0153754924cb7c83e3", size = 856876, upload-time = "2026-02-28T02:17:42.317Z" },
-    { url = "https://files.pythonhosted.org/packages/08/b7/2e641f3d084b120ca4c52e8c762a78da0b32bf03ef546330db3e2635dc5f/regex-2026.2.28-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2234059cfe33d9813a3677ef7667999caea9eeaa83fef98eb6ce15c6cf9e0215", size = 763632, upload-time = "2026-02-28T02:17:45.073Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/6d/0009021d97e79ee99f3d8641f0a8d001eed23479ade4c3125a5480bf3e2d/regex-2026.2.28-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c15af43c72a7fb0c97cbc66fa36a43546eddc5c06a662b64a0cbf30d6ac40944", size = 849320, upload-time = "2026-02-28T02:17:47.192Z" },
-    { url = "https://files.pythonhosted.org/packages/05/7a/51cfbad5758f8edae430cb21961a9c8d04bce1dae4d2d18d4186eec7cfa1/regex-2026.2.28-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9185cc63359862a6e80fe97f696e04b0ad9a11c4ac0a4a927f979f611bfe3768", size = 790152, upload-time = "2026-02-28T02:17:49.067Z" },
-    { url = "https://files.pythonhosted.org/packages/90/3d/a83e2b6b3daa142acb8c41d51de3876186307d5cb7490087031747662500/regex-2026.2.28-cp313-cp313-win32.whl", hash = "sha256:fb66e5245db9652abd7196ace599b04d9c0e4aa7c8f0e2803938377835780081", size = 266398, upload-time = "2026-02-28T02:17:50.744Z" },
-    { url = "https://files.pythonhosted.org/packages/85/4f/16e9ebb1fe5425e11b9596c8d57bf8877dcb32391da0bfd33742e3290637/regex-2026.2.28-cp313-cp313-win_amd64.whl", hash = "sha256:71a911098be38c859ceb3f9a9ce43f4ed9f4c6720ad8684a066ea246b76ad9ff", size = 277282, upload-time = "2026-02-28T02:17:53.074Z" },
-    { url = "https://files.pythonhosted.org/packages/07/b4/92851335332810c5a89723bf7a7e35c7209f90b7d4160024501717b28cc9/regex-2026.2.28-cp313-cp313-win_arm64.whl", hash = "sha256:39bb5727650b9a0275c6a6690f9bb3fe693a7e6cc5c3155b1240aedf8926423e", size = 270382, upload-time = "2026-02-28T02:17:54.888Z" },
-    { url = "https://files.pythonhosted.org/packages/24/07/6c7e4cec1e585959e96cbc24299d97e4437a81173217af54f1804994e911/regex-2026.2.28-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:97054c55db06ab020342cc0d35d6f62a465fa7662871190175f1ad6c655c028f", size = 492541, upload-time = "2026-02-28T02:17:56.813Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/13/55eb22ada7f43d4f4bb3815b6132183ebc331c81bd496e2d1f3b8d862e0d/regex-2026.2.28-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0d25a10811de831c2baa6aef3c0be91622f44dd8d31dd12e69f6398efb15e48b", size = 292984, upload-time = "2026-02-28T02:17:58.538Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/11/c301f8cb29ce9644a5ef85104c59244e6e7e90994a0f458da4d39baa8e17/regex-2026.2.28-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d6cfe798d8da41bb1862ed6e0cba14003d387c3c0c4a5d45591076ae9f0ce2f8", size = 291509, upload-time = "2026-02-28T02:18:00.208Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/43/aabe384ec1994b91796e903582427bc2ffaed9c4103819ed3c16d8e749f3/regex-2026.2.28-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fd0ce43e71d825b7c0661f9c54d4d74bd97c56c3fd102a8985bcfea48236bacb", size = 809429, upload-time = "2026-02-28T02:18:02.328Z" },
-    { url = "https://files.pythonhosted.org/packages/04/b8/8d2d987a816720c4f3109cee7c06a4b24ad0e02d4fc74919ab619e543737/regex-2026.2.28-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:00945d007fd74a9084d2ab79b695b595c6b7ba3698972fadd43e23230c6979c1", size = 869422, upload-time = "2026-02-28T02:18:04.23Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/ad/2c004509e763c0c3719f97c03eca26473bffb3868d54c5f280b8cd4f9e3d/regex-2026.2.28-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bec23c11cbbf09a4df32fe50d57cbdd777bc442269b6e39a1775654f1c95dee2", size = 915175, upload-time = "2026-02-28T02:18:06.791Z" },
-    { url = "https://files.pythonhosted.org/packages/55/c2/fd429066da487ef555a9da73bf214894aec77fc8c66a261ee355a69871a8/regex-2026.2.28-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5cdcc17d935c8f9d3f4db5c2ebe2640c332e3822ad5d23c2f8e0228e6947943a", size = 812044, upload-time = "2026-02-28T02:18:08.736Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/ca/feedb7055c62a3f7f659971bf45f0e0a87544b6b0cf462884761453f97c5/regex-2026.2.28-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a448af01e3d8031c89c5d902040b124a5e921a25c4e5e07a861ca591ce429341", size = 782056, upload-time = "2026-02-28T02:18:10.777Z" },
-    { url = "https://files.pythonhosted.org/packages/95/30/1aa959ed0d25c1dd7dd5047ea8ba482ceaef38ce363c401fd32a6b923e60/regex-2026.2.28-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:10d28e19bd4888e4abf43bd3925f3c134c52fdf7259219003588a42e24c2aa25", size = 798743, upload-time = "2026-02-28T02:18:13.025Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/1f/dadb9cf359004784051c897dcf4d5d79895f73a1bbb7b827abaa4814ae80/regex-2026.2.28-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:99985a2c277dcb9ccb63f937451af5d65177af1efdeb8173ac55b61095a0a05c", size = 864633, upload-time = "2026-02-28T02:18:16.84Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/f1/b9a25eb24e1cf79890f09e6ec971ee5b511519f1851de3453bc04f6c902b/regex-2026.2.28-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:e1e7b24cb3ae9953a560c563045d1ba56ee4749fbd05cf21ba571069bd7be81b", size = 770862, upload-time = "2026-02-28T02:18:18.892Z" },
-    { url = "https://files.pythonhosted.org/packages/02/9a/c5cb10b7aa6f182f9247a30cc9527e326601f46f4df864ac6db588d11fcd/regex-2026.2.28-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d8511a01d0e4ee1992eb3ba19e09bc1866fe03f05129c3aec3fdc4cbc77aad3f", size = 854788, upload-time = "2026-02-28T02:18:21.475Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/50/414ba0731c4bd40b011fa4703b2cc86879ec060c64f2a906e65a56452589/regex-2026.2.28-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:aaffaecffcd2479ce87aa1e74076c221700b7c804e48e98e62500ee748f0f550", size = 800184, upload-time = "2026-02-28T02:18:23.492Z" },
-    { url = "https://files.pythonhosted.org/packages/69/50/0c7290987f97e7e6830b0d853f69dc4dc5852c934aae63e7fdcd76b4c383/regex-2026.2.28-cp313-cp313t-win32.whl", hash = "sha256:ef77bdde9c9eba3f7fa5b58084b29bbcc74bcf55fdbeaa67c102a35b5bd7e7cc", size = 269137, upload-time = "2026-02-28T02:18:25.375Z" },
-    { url = "https://files.pythonhosted.org/packages/68/80/ef26ff90e74ceb4051ad6efcbbb8a4be965184a57e879ebcbdef327d18fa/regex-2026.2.28-cp313-cp313t-win_amd64.whl", hash = "sha256:98adf340100cbe6fbaf8e6dc75e28f2c191b1be50ffefe292fb0e6f6eefdb0d8", size = 280682, upload-time = "2026-02-28T02:18:27.205Z" },
-    { url = "https://files.pythonhosted.org/packages/69/8b/fbad9c52e83ffe8f97e3ed1aa0516e6dff6bb633a41da9e64645bc7efdc5/regex-2026.2.28-cp313-cp313t-win_arm64.whl", hash = "sha256:2fb950ac1d88e6b6a9414381f403797b236f9fa17e1eee07683af72b1634207b", size = 271735, upload-time = "2026-02-28T02:18:29.015Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/03/691015f7a7cb1ed6dacb2ea5de5682e4858e05a4c5506b2839cd533bbcd6/regex-2026.2.28-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:78454178c7df31372ea737996fb7f36b3c2c92cccc641d251e072478afb4babc", size = 489497, upload-time = "2026-02-28T02:18:30.889Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/ba/8db8fd19afcbfa0e1036eaa70c05f20ca8405817d4ad7a38a6b4c2f031ac/regex-2026.2.28-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:5d10303dd18cedfd4d095543998404df656088240bcfd3cd20a8f95b861f74bd", size = 291295, upload-time = "2026-02-28T02:18:33.426Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/79/9aa0caf089e8defef9b857b52fc53801f62ff868e19e5c83d4a96612eba1/regex-2026.2.28-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:19a9c9e0a8f24f39d575a6a854d516b48ffe4cbdcb9de55cb0570a032556ecff", size = 289275, upload-time = "2026-02-28T02:18:35.247Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/26/ee53117066a30ef9c883bf1127eece08308ccf8ccd45c45a966e7a665385/regex-2026.2.28-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09500be324f49b470d907b3ef8af9afe857f5cca486f853853f7945ddbf75911", size = 797176, upload-time = "2026-02-28T02:18:37.15Z" },
-    { url = "https://files.pythonhosted.org/packages/05/1b/67fb0495a97259925f343ae78b5d24d4a6624356ae138b57f18bd43006e4/regex-2026.2.28-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fb1c4ff62277d87a7335f2c1ea4e0387b8f2b3ad88a64efd9943906aafad4f33", size = 863813, upload-time = "2026-02-28T02:18:39.478Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/1d/93ac9bbafc53618091c685c7ed40239a90bf9f2a82c983f0baa97cb7ae07/regex-2026.2.28-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b8b3f1be1738feadc69f62daa250c933e85c6f34fa378f54a7ff43807c1b9117", size = 908678, upload-time = "2026-02-28T02:18:41.619Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/7a/a8f5e0561702b25239846a16349feece59712ae20598ebb205580332a471/regex-2026.2.28-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc8ed8c3f41c27acb83f7b6a9eb727a73fc6663441890c5cb3426a5f6a91ce7d", size = 801528, upload-time = "2026-02-28T02:18:43.624Z" },
-    { url = "https://files.pythonhosted.org/packages/96/5d/ed6d4cbde80309854b1b9f42d9062fee38ade15f7eb4909f6ef2440403b5/regex-2026.2.28-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa539be029844c0ce1114762d2952ab6cfdd7c7c9bd72e0db26b94c3c36dcc5a", size = 775373, upload-time = "2026-02-28T02:18:46.102Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/e9/6e53c34e8068b9deec3e87210086ecb5b9efebdefca6b0d3fa43d66dcecb/regex-2026.2.28-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7900157786428a79615a8264dac1f12c9b02957c473c8110c6b1f972dcecaddf", size = 784859, upload-time = "2026-02-28T02:18:48.269Z" },
-    { url = "https://files.pythonhosted.org/packages/48/3c/736e1c7ca7f0dcd2ae33819888fdc69058a349b7e5e84bc3e2f296bbf794/regex-2026.2.28-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:0b1d2b07614d95fa2bf8a63fd1e98bd8fa2b4848dc91b1efbc8ba219fdd73952", size = 857813, upload-time = "2026-02-28T02:18:50.576Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/7c/48c4659ad9da61f58e79dbe8c05223e0006696b603c16eb6b5cbfbb52c27/regex-2026.2.28-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:b389c61aa28a79c2e0527ac36da579869c2e235a5b208a12c5b5318cda2501d8", size = 763705, upload-time = "2026-02-28T02:18:52.59Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/a1/bc1c261789283128165f71b71b4b221dd1b79c77023752a6074c102f18d8/regex-2026.2.28-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f467cb602f03fbd1ab1908f68b53c649ce393fde056628dc8c7e634dab6bfc07", size = 848734, upload-time = "2026-02-28T02:18:54.595Z" },
-    { url = "https://files.pythonhosted.org/packages/10/d8/979407faf1397036e25a5ae778157366a911c0f382c62501009f4957cf86/regex-2026.2.28-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e8c8cb2deba42f5ec1ede46374e990f8adc5e6456a57ac1a261b19be6f28e4e6", size = 789871, upload-time = "2026-02-28T02:18:57.34Z" },
-    { url = "https://files.pythonhosted.org/packages/03/23/da716821277115fcb1f4e3de1e5dc5023a1e6533598c486abf5448612579/regex-2026.2.28-cp314-cp314-win32.whl", hash = "sha256:9036b400b20e4858d56d117108d7813ed07bb7803e3eed766675862131135ca6", size = 271825, upload-time = "2026-02-28T02:18:59.202Z" },
-    { url = "https://files.pythonhosted.org/packages/91/ff/90696f535d978d5f16a52a419be2770a8d8a0e7e0cfecdbfc31313df7fab/regex-2026.2.28-cp314-cp314-win_amd64.whl", hash = "sha256:1d367257cd86c1cbb97ea94e77b373a0bbc2224976e247f173d19e8f18b4afa7", size = 280548, upload-time = "2026-02-28T02:19:01.049Z" },
-    { url = "https://files.pythonhosted.org/packages/69/f9/5e1b5652fc0af3fcdf7677e7df3ad2a0d47d669b34ac29a63bb177bb731b/regex-2026.2.28-cp314-cp314-win_arm64.whl", hash = "sha256:5e68192bb3a1d6fb2836da24aa494e413ea65853a21505e142e5b1064a595f3d", size = 273444, upload-time = "2026-02-28T02:19:03.255Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/eb/8389f9e940ac89bcf58d185e230a677b4fd07c5f9b917603ad5c0f8fa8fe/regex-2026.2.28-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:a5dac14d0872eeb35260a8e30bac07ddf22adc1e3a0635b52b02e180d17c9c7e", size = 492546, upload-time = "2026-02-28T02:19:05.378Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/c7/09441d27ce2a6fa6a61ea3150ea4639c1dcda9b31b2ea07b80d6937b24dd/regex-2026.2.28-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ec0c608b7a7465ffadb344ed7c987ff2f11ee03f6a130b569aa74d8a70e8333c", size = 292986, upload-time = "2026-02-28T02:19:07.24Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/69/4144b60ed7760a6bd235e4087041f487aa4aa62b45618ce018b0c14833ea/regex-2026.2.28-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c7815afb0ca45456613fdaf60ea9c993715511c8d53a83bc468305cbc0ee23c7", size = 291518, upload-time = "2026-02-28T02:19:09.698Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/be/77e5426cf5948c82f98c53582009ca9e94938c71f73a8918474f2e2990bb/regex-2026.2.28-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b059e71ec363968671693a78c5053bd9cb2fe410f9b8e4657e88377ebd603a2e", size = 809464, upload-time = "2026-02-28T02:19:12.494Z" },
-    { url = "https://files.pythonhosted.org/packages/45/99/2c8c5ac90dc7d05c6e7d8e72c6a3599dc08cd577ac476898e91ca787d7f1/regex-2026.2.28-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8cf76f1a29f0e99dcfd7aef1551a9827588aae5a737fe31442021165f1920dc", size = 869553, upload-time = "2026-02-28T02:19:15.151Z" },
-    { url = "https://files.pythonhosted.org/packages/53/34/daa66a342f0271e7737003abf6c3097aa0498d58c668dbd88362ef94eb5d/regex-2026.2.28-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:180e08a435a0319e6a4821c3468da18dc7001987e1c17ae1335488dfe7518dd8", size = 915289, upload-time = "2026-02-28T02:19:17.331Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/c7/e22c2aaf0a12e7e22ab19b004bb78d32ca1ecc7ef245949935463c5567de/regex-2026.2.28-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e496956106fd59ba6322a8ea17141a27c5040e5ee8f9433ae92d4e5204462a0", size = 812156, upload-time = "2026-02-28T02:19:20.011Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/bb/2dc18c1efd9051cf389cd0d7a3a4d90f6804b9fff3a51b5dc3c85b935f71/regex-2026.2.28-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bba2b18d70eeb7b79950f12f633beeecd923f7c9ad6f6bae28e59b4cb3ab046b", size = 782215, upload-time = "2026-02-28T02:19:22.047Z" },
-    { url = "https://files.pythonhosted.org/packages/17/1e/9e4ec9b9013931faa32226ec4aa3c71fe664a6d8a2b91ac56442128b332f/regex-2026.2.28-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6db7bfae0f8a2793ff1f7021468ea55e2699d0790eb58ee6ab36ae43aa00bc5b", size = 798925, upload-time = "2026-02-28T02:19:24.173Z" },
-    { url = "https://files.pythonhosted.org/packages/71/57/a505927e449a9ccb41e2cc8d735e2abe3444b0213d1cf9cb364a8c1f2524/regex-2026.2.28-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:d0b02e8b7e5874b48ae0f077ecca61c1a6a9f9895e9c6dfb191b55b242862033", size = 864701, upload-time = "2026-02-28T02:19:26.376Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/ad/c62cb60cdd93e13eac5b3d9d6bd5d284225ed0e3329426f94d2552dd7cca/regex-2026.2.28-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:25b6eb660c5cf4b8c3407a1ed462abba26a926cc9965e164268a3267bcc06a43", size = 770899, upload-time = "2026-02-28T02:19:29.38Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/5a/874f861f5c3d5ab99633e8030dee1bc113db8e0be299d1f4b07f5b5ec349/regex-2026.2.28-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:5a932ea8ad5d0430351ff9c76c8db34db0d9f53c1d78f06022a21f4e290c5c18", size = 854727, upload-time = "2026-02-28T02:19:31.494Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/ca/d2c03b0efde47e13db895b975b2be6a73ed90b8ba963677927283d43bf74/regex-2026.2.28-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:1c2c95e1a2b0f89d01e821ff4de1be4b5d73d1f4b0bf679fa27c1ad8d2327f1a", size = 800366, upload-time = "2026-02-28T02:19:34.248Z" },
-    { url = "https://files.pythonhosted.org/packages/14/bd/ee13b20b763b8989f7c75d592bfd5de37dc1181814a2a2747fedcf97e3ba/regex-2026.2.28-cp314-cp314t-win32.whl", hash = "sha256:bbb882061f742eb5d46f2f1bd5304055be0a66b783576de3d7eef1bed4778a6e", size = 274936, upload-time = "2026-02-28T02:19:36.313Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/e7/d8020e39414c93af7f0d8688eabcecece44abfd5ce314b21dfda0eebd3d8/regex-2026.2.28-cp314-cp314t-win_amd64.whl", hash = "sha256:6591f281cb44dc13de9585b552cec6fc6cf47fb2fe7a48892295ee9bc4a612f9", size = 284779, upload-time = "2026-02-28T02:19:38.625Z" },
-    { url = "https://files.pythonhosted.org/packages/13/c0/ad225f4a405827486f1955283407cf758b6d2fb966712644c5f5aef33d1b/regex-2026.2.28-cp314-cp314t-win_arm64.whl", hash = "sha256:dee50f1be42222f89767b64b283283ef963189da0dda4a515aa54a5563c62dec", size = 275010, upload-time = "2026-02-28T02:19:40.65Z" },
+version = "2026.2.19"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ff/c0/d8079d4f6342e4cec5c3e7d7415b5cd3e633d5f4124f7a4626908dbe84c7/regex-2026.2.19.tar.gz", hash = "sha256:6fb8cb09b10e38f3ae17cc6dc04a1df77762bd0351b6ba9041438e7cc85ec310", size = 414973, upload-time = "2026-02-19T19:03:47.899Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/af/de/f10b4506acfd684de4e42b0aa56ccea1a778a18864da8f6d319a40591062/regex-2026.2.19-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f5a37a17d110f9d5357a43aa7e3507cb077bf3143d1c549a45c4649e90e40a70", size = 488369, upload-time = "2026-02-19T18:59:45.01Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/2f/b4eaef1f0b4d0bf2a73eaf07c08f6c13422918a4180c9211ce0521746d0c/regex-2026.2.19-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:676c4e6847a83a1d5732b4ed553881ad36f0a8133627bb695a89ecf3571499d3", size = 290743, upload-time = "2026-02-19T18:59:48.527Z" },
+    { url = "https://files.pythonhosted.org/packages/76/7c/805413bd0a88d04688c0725c222cfb811bd54a2f571004c24199a1ae55d6/regex-2026.2.19-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82336faeecac33297cd42857c3b36f12b91810e3fdd276befdd128f73a2b43fa", size = 288652, upload-time = "2026-02-19T18:59:50.2Z" },
+    { url = "https://files.pythonhosted.org/packages/08/ff/2c4cd530a878b1975398e76faef4285f11e7c9ccf1aaedfd528bfcc1f580/regex-2026.2.19-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:52136f5b71f095cb74b736cc3a1b578030dada2e361ef2f07ca582240b703946", size = 781759, upload-time = "2026-02-19T18:59:51.836Z" },
+    { url = "https://files.pythonhosted.org/packages/37/45/9608ab1b41f6740ff4076eabadde8e8b3f3400942b348ac41e8599ccc131/regex-2026.2.19-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4192464fe3e6cb0ef6751f7d3b16f886d8270d359ed1590dd555539d364f0ff7", size = 850947, upload-time = "2026-02-19T18:59:53.739Z" },
+    { url = "https://files.pythonhosted.org/packages/90/3a/66471b6c4f7cac17e14bf5300e46661bba2b17ffb0871bd2759e837a6f82/regex-2026.2.19-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e561dd47a85d2660d3d3af4e6cb2da825cf20f121e577147963f875b83d32786", size = 898794, upload-time = "2026-02-19T18:59:55.993Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/d2/38c53929a5931f7398e5e49f5a5a3079cb2aba30119b4350608364cfad8c/regex-2026.2.19-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00ec994d7824bf01cd6c7d14c7a6a04d9aeaf7c42a2bc22d2359d715634d539b", size = 791922, upload-time = "2026-02-19T18:59:58.216Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/bd/b046e065630fa25059d9c195b7b5308ea94da45eee65d40879772500f74c/regex-2026.2.19-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2cb00aabd96b345d56a8c2bc328c8d6c4d29935061e05078bf1f02302e12abf5", size = 783345, upload-time = "2026-02-19T18:59:59.948Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/8f/045c643d2fa255a985e8f87d848e4be230b711a8935e4bdc58e60b8f7b84/regex-2026.2.19-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f374366ed35673ea81b86a8859c457d4fae6ba092b71024857e9e237410c7404", size = 768055, upload-time = "2026-02-19T19:00:01.65Z" },
+    { url = "https://files.pythonhosted.org/packages/72/9f/ab7ae9f5447559562f1a788bbc85c0e526528c5e6c20542d18e4afc86aad/regex-2026.2.19-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f9417fd853fcd00b7d55167e692966dd12d95ba1a88bf08a62002ccd85030790", size = 774955, upload-time = "2026-02-19T19:00:03.368Z" },
+    { url = "https://files.pythonhosted.org/packages/37/5c/f16fc23c56f60b6f4ff194604a6e53bb8aec7b6e8e4a23a482dee8d77235/regex-2026.2.19-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:12e86a01594031abf892686fcb309b041bf3de3d13d99eb7e2b02a8f3c687df1", size = 846010, upload-time = "2026-02-19T19:00:05.079Z" },
+    { url = "https://files.pythonhosted.org/packages/51/c8/6be4c854135d7c9f35d4deeafdaf124b039ecb4ffcaeb7ed0495ad2c97ca/regex-2026.2.19-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:79014115e6fdf18fd9b32e291d58181bf42d4298642beaa13fd73e69810e4cb6", size = 755938, upload-time = "2026-02-19T19:00:07.148Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/8d/f683d49b9663a5324b95a328e69d397f6dade7cb84154eec116bf79fe150/regex-2026.2.19-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:31aefac2506967b7dd69af2c58eca3cc8b086d4110b66d6ac6e9026f0ee5b697", size = 835773, upload-time = "2026-02-19T19:00:08.939Z" },
+    { url = "https://files.pythonhosted.org/packages/16/cd/619224b90da09f167fe4497c350a0d0b30edc539ee9244bf93e604c073c3/regex-2026.2.19-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:49cef7bb2a491f91a8869c7cdd90babf0a417047ab0bf923cd038ed2eab2ccb8", size = 780075, upload-time = "2026-02-19T19:00:10.838Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/88/19cfb0c262d6f9d722edef29157125418bf90eb3508186bf79335afeedae/regex-2026.2.19-cp310-cp310-win32.whl", hash = "sha256:3a039474986e7a314ace6efb9ce52f5da2bdb80ac4955358723d350ec85c32ad", size = 266004, upload-time = "2026-02-19T19:00:12.371Z" },
+    { url = "https://files.pythonhosted.org/packages/82/af/5b487e0287ef72545d7ae92edecdacbe3d44e531cac24fda7de5598ba8dd/regex-2026.2.19-cp310-cp310-win_amd64.whl", hash = "sha256:5b81ff4f9cad99f90c807a00c5882fbcda86d8b3edd94e709fb531fc52cb3d25", size = 277895, upload-time = "2026-02-19T19:00:13.75Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/19/b6715a187ffca4d2979af92a46ce922445ba41f910bf187ccd666a2d52ef/regex-2026.2.19-cp310-cp310-win_arm64.whl", hash = "sha256:a032bc01a4bc73fc3cadba793fce28eb420da39338f47910c59ffcc11a5ba5ef", size = 270465, upload-time = "2026-02-19T19:00:15.127Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/93/43f405a98f54cc59c786efb4fc0b644615ed2392fc89d57d30da11f35b5b/regex-2026.2.19-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:93b16a18cadb938f0f2306267161d57eb33081a861cee9ffcd71e60941eb5dfc", size = 488365, upload-time = "2026-02-19T19:00:17.857Z" },
+    { url = "https://files.pythonhosted.org/packages/66/46/da0efce22cd8f5ae28eeb25ac69703f49edcad3331ac22440776f4ea0867/regex-2026.2.19-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:78af1e499cab704131f6f4e2f155b7f54ce396ca2acb6ef21a49507e4752e0be", size = 290737, upload-time = "2026-02-19T19:00:19.869Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/19/f735078448132c1c974974d30d5306337bc297fe6b6f126164bff72c1019/regex-2026.2.19-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:eb20c11aa4c3793c9ad04c19a972078cdadb261b8429380364be28e867a843f2", size = 288654, upload-time = "2026-02-19T19:00:21.307Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/3e/6d7c24a2f423c03ad03e3fbddefa431057186ac1c4cb4fa98b03c7f39808/regex-2026.2.19-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:db5fd91eec71e7b08de10011a2223d0faa20448d4e1380b9daa179fa7bf58906", size = 793785, upload-time = "2026-02-19T19:00:22.926Z" },
+    { url = "https://files.pythonhosted.org/packages/67/32/fdb8107504b3122a79bde6705ac1f9d495ed1fe35b87d7cfc1864471999a/regex-2026.2.19-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fdbade8acba71bb45057c2b72f477f0b527c4895f9c83e6cfc30d4a006c21726", size = 860731, upload-time = "2026-02-19T19:00:25.196Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/fd/cc8c6f05868defd840be6e75919b1c3f462357969ac2c2a0958363b4dc23/regex-2026.2.19-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:31a5f561eb111d6aae14202e7043fb0b406d3c8dddbbb9e60851725c9b38ab1d", size = 907350, upload-time = "2026-02-19T19:00:27.093Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/1b/4590db9caa8db3d5a3fe31197c4e42c15aab3643b549ef6a454525fa3a61/regex-2026.2.19-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4584a3ee5f257b71e4b693cc9be3a5104249399f4116fe518c3f79b0c6fc7083", size = 800628, upload-time = "2026-02-19T19:00:29.392Z" },
+    { url = "https://files.pythonhosted.org/packages/76/05/513eaa5b96fa579fd0b813e19ec047baaaf573d7374ff010fa139b384bf7/regex-2026.2.19-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:196553ba2a2f47904e5dc272d948a746352e2644005627467e055be19d73b39e", size = 773711, upload-time = "2026-02-19T19:00:30.996Z" },
+    { url = "https://files.pythonhosted.org/packages/95/65/5aed06d8c54563d37fea496cf888be504879a3981a7c8e12c24b2c92c209/regex-2026.2.19-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0c10869d18abb759a3317c757746cc913d6324ce128b8bcec99350df10419f18", size = 783186, upload-time = "2026-02-19T19:00:34.598Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/57/79a633ad90f2371b4ef9cd72ba3a69a1a67d0cfaab4fe6fa8586d46044ef/regex-2026.2.19-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e689fed279cbe797a6b570bd18ff535b284d057202692c73420cb93cca41aa32", size = 854854, upload-time = "2026-02-19T19:00:37.306Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/2d/0f113d477d9e91ec4545ec36c82e58be25038d06788229c91ad52da2b7f5/regex-2026.2.19-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0782bd983f19ac7594039c9277cd6f75c89598c1d72f417e4d30d874105eb0c7", size = 762279, upload-time = "2026-02-19T19:00:39.793Z" },
+    { url = "https://files.pythonhosted.org/packages/39/cb/237e9fa4f61469fd4f037164dbe8e675a376c88cf73aaaa0aedfd305601c/regex-2026.2.19-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:dbb240c81cfed5d4a67cb86d7676d9f7ec9c3f186310bec37d8a1415210e111e", size = 846172, upload-time = "2026-02-19T19:00:42.134Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/7c/104779c5915cc4eb557a33590f8a3f68089269c64287dd769afd76c7ce61/regex-2026.2.19-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80d31c3f1fe7e4c6cd1831cd4478a0609903044dfcdc4660abfe6fb307add7f0", size = 789078, upload-time = "2026-02-19T19:00:43.908Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/4a/eae4e88b1317fb2ff57794915e0099198f51e760f6280b320adfa0ad396d/regex-2026.2.19-cp311-cp311-win32.whl", hash = "sha256:66e6a43225ff1064f8926adbafe0922b370d381c3330edaf9891cade52daa790", size = 266013, upload-time = "2026-02-19T19:00:47.274Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/29/ba89eb8fae79705e07ad1bd69e568f776159d2a8093c9dbc5303ee618298/regex-2026.2.19-cp311-cp311-win_amd64.whl", hash = "sha256:59a7a5216485a1896c5800e9feb8ff9213e11967b482633b6195d7da11450013", size = 277906, upload-time = "2026-02-19T19:00:49.011Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/1a/042d8f04b28e318df92df69d8becb0f42221eb3dd4fe5e976522f4337c76/regex-2026.2.19-cp311-cp311-win_arm64.whl", hash = "sha256:ec661807ffc14c8d14bb0b8c1bb3d5906e476bc96f98b565b709d03962ee4dd4", size = 270463, upload-time = "2026-02-19T19:00:50.988Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/73/13b39c7c9356f333e564ab4790b6cb0df125b8e64e8d6474e73da49b1955/regex-2026.2.19-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c1665138776e4ac1aa75146669236f7a8a696433ec4e525abf092ca9189247cc", size = 489541, upload-time = "2026-02-19T19:00:52.728Z" },
+    { url = "https://files.pythonhosted.org/packages/15/77/fcc7bd9a67000d07fbcc11ed226077287a40d5c84544e62171d29d3ef59c/regex-2026.2.19-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d792b84709021945597e05656aac059526df4e0c9ef60a0eaebb306f8fafcaa8", size = 291414, upload-time = "2026-02-19T19:00:54.51Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/87/3997fc72dc59233426ef2e18dfdd105bb123812fff740ee9cc348f1a3243/regex-2026.2.19-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:db970bcce4d63b37b3f9eb8c893f0db980bbf1d404a1d8d2b17aa8189de92c53", size = 289140, upload-time = "2026-02-19T19:00:56.841Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/d0/b7dd3883ed1cff8ee0c0c9462d828aaf12be63bf5dc55453cbf423523b13/regex-2026.2.19-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03d706fbe7dfec503c8c3cb76f9352b3e3b53b623672aa49f18a251a6c71b8e6", size = 798767, upload-time = "2026-02-19T19:00:59.014Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/7e/8e2d09103832891b2b735a2515abf377db21144c6dd5ede1fb03c619bf09/regex-2026.2.19-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8dbff048c042beef60aa1848961384572c5afb9e8b290b0f1203a5c42cf5af65", size = 864436, upload-time = "2026-02-19T19:01:00.772Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/2e/afea8d23a6db1f67f45e3a0da3057104ce32e154f57dd0c8997274d45fcd/regex-2026.2.19-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccaaf9b907ea6b4223d5cbf5fa5dff5f33dc66f4907a25b967b8a81339a6e332", size = 912391, upload-time = "2026-02-19T19:01:02.865Z" },
+    { url = "https://files.pythonhosted.org/packages/59/3c/ea5a4687adaba5e125b9bd6190153d0037325a0ba3757cc1537cc2c8dd90/regex-2026.2.19-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75472631eee7898e16a8a20998d15106cb31cfde21cdf96ab40b432a7082af06", size = 803702, upload-time = "2026-02-19T19:01:05.298Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/c5/624a0705e8473a26488ec1a3a4e0b8763ecfc682a185c302dfec71daea35/regex-2026.2.19-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d89f85a5ccc0cec125c24be75610d433d65295827ebaf0d884cbe56df82d4774", size = 775980, upload-time = "2026-02-19T19:01:07.047Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/4b/ed776642533232b5599b7c1f9d817fe11faf597e8a92b7a44b841daaae76/regex-2026.2.19-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0d9f81806abdca3234c3dd582b8a97492e93de3602c8772013cb4affa12d1668", size = 788122, upload-time = "2026-02-19T19:01:08.744Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/58/e93e093921d13b9784b4f69896b6e2a9e09580a265c59d9eb95e87d288f2/regex-2026.2.19-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9dadc10d1c2bbb1326e572a226d2ec56474ab8aab26fdb8cf19419b372c349a9", size = 858910, upload-time = "2026-02-19T19:01:10.488Z" },
+    { url = "https://files.pythonhosted.org/packages/85/77/ff1d25a0c56cd546e0455cbc93235beb33474899690e6a361fa6b52d265b/regex-2026.2.19-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:6bc25d7e15f80c9dc7853cbb490b91c1ec7310808b09d56bd278fe03d776f4f6", size = 764153, upload-time = "2026-02-19T19:01:12.156Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/ef/8ec58df26d52d04443b1dc56f9be4b409f43ed5ae6c0248a287f52311fc4/regex-2026.2.19-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:965d59792f5037d9138da6fed50ba943162160443b43d4895b182551805aff9c", size = 850348, upload-time = "2026-02-19T19:01:14.147Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/b3/c42fd5ed91639ce5a4225b9df909180fc95586db071f2bf7c68d2ccbfbe6/regex-2026.2.19-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:38d88c6ed4a09ed61403dbdf515d969ccba34669af3961ceb7311ecd0cef504a", size = 789977, upload-time = "2026-02-19T19:01:15.838Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/22/bc3b58ebddbfd6ca5633e71fd41829ee931963aad1ebeec55aad0c23044e/regex-2026.2.19-cp312-cp312-win32.whl", hash = "sha256:5df947cabab4b643d4791af5e28aecf6bf62e6160e525651a12eba3d03755e6b", size = 266381, upload-time = "2026-02-19T19:01:17.952Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/4a/6ff550b63e67603ee60e69dc6bd2d5694e85046a558f663b2434bdaeb285/regex-2026.2.19-cp312-cp312-win_amd64.whl", hash = "sha256:4146dc576ea99634ae9c15587d0c43273b4023a10702998edf0fa68ccb60237a", size = 277274, upload-time = "2026-02-19T19:01:19.826Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/29/9ec48b679b1e87e7bc8517dff45351eab38f74fbbda1fbcf0e9e6d4e8174/regex-2026.2.19-cp312-cp312-win_arm64.whl", hash = "sha256:cdc0a80f679353bd68450d2a42996090c30b2e15ca90ded6156c31f1a3b63f3b", size = 270509, upload-time = "2026-02-19T19:01:22.075Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/2d/a849835e76ac88fcf9e8784e642d3ea635d183c4112150ca91499d6703af/regex-2026.2.19-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8df08decd339e8b3f6a2eb5c05c687fe9d963ae91f352bc57beb05f5b2ac6879", size = 489329, upload-time = "2026-02-19T19:01:23.841Z" },
+    { url = "https://files.pythonhosted.org/packages/da/aa/78ff4666d3855490bae87845a5983485e765e1f970da20adffa2937b241d/regex-2026.2.19-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3aa0944f1dc6e92f91f3b306ba7f851e1009398c84bfd370633182ee4fc26a64", size = 291308, upload-time = "2026-02-19T19:01:25.605Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/58/714384efcc07ae6beba528a541f6e99188c5cc1bc0295337f4e8a868296d/regex-2026.2.19-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c13228fbecb03eadbfd8f521732c5fda09ef761af02e920a3148e18ad0e09968", size = 289033, upload-time = "2026-02-19T19:01:27.243Z" },
+    { url = "https://files.pythonhosted.org/packages/75/ec/6438a9344d2869cf5265236a06af1ca6d885e5848b6561e10629bc8e5a11/regex-2026.2.19-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0d0e72703c60d68b18b27cde7cdb65ed2570ae29fb37231aa3076bfb6b1d1c13", size = 798798, upload-time = "2026-02-19T19:01:28.877Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/be/b1ce2d395e3fd2ce5f2fde2522f76cade4297cfe84cd61990ff48308749c/regex-2026.2.19-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:46e69a4bf552e30e74a8aa73f473c87efcb7f6e8c8ece60d9fd7bf13d5c86f02", size = 864444, upload-time = "2026-02-19T19:01:30.933Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/97/a3406460c504f7136f140d9461960c25f058b0240e4424d6fb73c7a067ab/regex-2026.2.19-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8edda06079bd770f7f0cf7f3bba1a0b447b96b4a543c91fe0c142d034c166161", size = 912633, upload-time = "2026-02-19T19:01:32.744Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/d9/e5dbef95008d84e9af1dc0faabbc34a7fbc8daa05bc5807c5cf86c2bec49/regex-2026.2.19-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9cbc69eae834afbf634f7c902fc72ff3e993f1c699156dd1af1adab5d06b7fe7", size = 803718, upload-time = "2026-02-19T19:01:34.61Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/e5/61d80132690a1ef8dc48e0f44248036877aebf94235d43f63a20d1598888/regex-2026.2.19-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bcf57d30659996ee5c7937999874504c11b5a068edc9515e6a59221cc2744dd1", size = 775975, upload-time = "2026-02-19T19:01:36.525Z" },
+    { url = "https://files.pythonhosted.org/packages/05/32/ae828b3b312c972cf228b634447de27237d593d61505e6ad84723f8eabba/regex-2026.2.19-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8e6e77cd92216eb489e21e5652a11b186afe9bdefca8a2db739fd6b205a9e0a4", size = 788129, upload-time = "2026-02-19T19:01:38.498Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/25/d74f34676f22bec401eddf0e5e457296941e10cbb2a49a571ca7a2c16e5a/regex-2026.2.19-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b9ab8dec42afefa6314ea9b31b188259ffdd93f433d77cad454cd0b8d235ce1c", size = 858818, upload-time = "2026-02-19T19:01:40.409Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/eb/0bc2b01a6b0b264e1406e5ef11cae3f634c3bd1a6e61206fd3227ce8e89c/regex-2026.2.19-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:294c0fb2e87c6bcc5f577c8f609210f5700b993151913352ed6c6af42f30f95f", size = 764186, upload-time = "2026-02-19T19:01:43.009Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/37/5fe5a630d0d99ecf0c3570f8905dafbc160443a2d80181607770086c9812/regex-2026.2.19-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c0924c64b082d4512b923ac016d6e1dcf647a3560b8a4c7e55cbbd13656cb4ed", size = 850363, upload-time = "2026-02-19T19:01:45.015Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/45/ef68d805294b01ec030cfd388724ba76a5a21a67f32af05b17924520cb0b/regex-2026.2.19-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:790dbf87b0361606cb0d79b393c3e8f4436a14ee56568a7463014565d97da02a", size = 790026, upload-time = "2026-02-19T19:01:47.51Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/3a/40d3b66923dfc5aeba182f194f0ca35d09afe8c031a193e6ae46971a0a0e/regex-2026.2.19-cp313-cp313-win32.whl", hash = "sha256:43cdde87006271be6963896ed816733b10967baaf0e271d529c82e93da66675b", size = 266372, upload-time = "2026-02-19T19:01:49.469Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/f2/39082e8739bfd553497689e74f9d5e5bb531d6f8936d0b94f43e18f219c0/regex-2026.2.19-cp313-cp313-win_amd64.whl", hash = "sha256:127ea69273485348a126ebbf3d6052604d3c7da284f797bba781f364c0947d47", size = 277253, upload-time = "2026-02-19T19:01:51.208Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/c2/852b9600d53fb47e47080c203e2cdc0ac7e84e37032a57e0eaa37446033a/regex-2026.2.19-cp313-cp313-win_arm64.whl", hash = "sha256:5e56c669535ac59cbf96ca1ece0ef26cb66809990cda4fa45e1e32c3b146599e", size = 270505, upload-time = "2026-02-19T19:01:52.865Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/a2/e0b4575b93bc84db3b1fab24183e008691cd2db5c0ef14ed52681fbd94dd/regex-2026.2.19-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:93d881cab5afdc41a005dba1524a40947d6f7a525057aa64aaf16065cf62faa9", size = 492202, upload-time = "2026-02-19T19:01:54.816Z" },
+    { url = "https://files.pythonhosted.org/packages/24/b5/b84fec8cbb5f92a7eed2b6b5353a6a9eed9670fee31817c2da9eb85dc797/regex-2026.2.19-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:80caaa1ddcc942ec7be18427354f9d58a79cee82dea2a6b3d4fd83302e1240d7", size = 292884, upload-time = "2026-02-19T19:01:58.254Z" },
+    { url = "https://files.pythonhosted.org/packages/70/0c/fe89966dfae43da46f475362401f03e4d7dc3a3c955b54f632abc52669e0/regex-2026.2.19-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d793c5b4d2b4c668524cd1651404cfc798d40694c759aec997e196fe9729ec60", size = 291236, upload-time = "2026-02-19T19:01:59.966Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/f7/bda2695134f3e63eb5cccbbf608c2a12aab93d261ff4e2fe49b47fabc948/regex-2026.2.19-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5100acb20648d9efd3f4e7e91f51187f95f22a741dcd719548a6cf4e1b34b3f", size = 807660, upload-time = "2026-02-19T19:02:01.632Z" },
+    { url = "https://files.pythonhosted.org/packages/11/56/6e3a4bf5e60d17326b7003d91bbde8938e439256dec211d835597a44972d/regex-2026.2.19-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5e3a31e94d10e52a896adaa3adf3621bd526ad2b45b8c2d23d1bbe74c7423007", size = 873585, upload-time = "2026-02-19T19:02:03.522Z" },
+    { url = "https://files.pythonhosted.org/packages/35/5e/c90c6aa4d1317cc11839359479cfdd2662608f339e84e81ba751c8a4e461/regex-2026.2.19-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8497421099b981f67c99eba4154cf0dfd8e47159431427a11cfb6487f7791d9e", size = 915243, upload-time = "2026-02-19T19:02:05.608Z" },
+    { url = "https://files.pythonhosted.org/packages/90/7c/981ea0694116793001496aaf9524e5c99e122ec3952d9e7f1878af3a6bf1/regex-2026.2.19-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e7a08622f7d51d7a068f7e4052a38739c412a3e74f55817073d2e2418149619", size = 812922, upload-time = "2026-02-19T19:02:08.115Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/be/9eda82afa425370ffdb3fa9f3ea42450b9ae4da3ff0a4ec20466f69e371b/regex-2026.2.19-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8abe671cf0f15c26b1ad389bf4043b068ce7d3b1c5d9313e12895f57d6738555", size = 781318, upload-time = "2026-02-19T19:02:10.072Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/d5/50f0bbe56a8199f60a7b6c714e06e54b76b33d31806a69d0703b23ce2a9e/regex-2026.2.19-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5a8f28dd32a4ce9c41758d43b5b9115c1c497b4b1f50c457602c1d571fa98ce1", size = 795649, upload-time = "2026-02-19T19:02:11.96Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/09/d039f081e44a8b0134d0bb2dd805b0ddf390b69d0b58297ae098847c572f/regex-2026.2.19-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:654dc41a5ba9b8cc8432b3f1aa8906d8b45f3e9502442a07c2f27f6c63f85db5", size = 868844, upload-time = "2026-02-19T19:02:14.043Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/53/e2903b79a19ec8557fe7cd21cd093956ff2dbc2e0e33969e3adbe5b184dd/regex-2026.2.19-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:4a02faea614e7fdd6ba8b3bec6c8e79529d356b100381cec76e638f45d12ca04", size = 770113, upload-time = "2026-02-19T19:02:16.161Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/e2/784667767b55714ebb4e59bf106362327476b882c0b2f93c25e84cc99b1a/regex-2026.2.19-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d96162140bb819814428800934c7b71b7bffe81fb6da2d6abc1dcca31741eca3", size = 854922, upload-time = "2026-02-19T19:02:18.155Z" },
+    { url = "https://files.pythonhosted.org/packages/59/78/9ef4356bd4aed752775bd18071034979b85f035fec51f3a4f9dea497a254/regex-2026.2.19-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c227f2922153ee42bbeb355fd6d009f8c81d9d7bdd666e2276ce41f53ed9a743", size = 799636, upload-time = "2026-02-19T19:02:20.04Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/54/fcfc9287f20c5c9bd8db755aafe3e8cf4d99a6a3f1c7162ee182e0ca9374/regex-2026.2.19-cp313-cp313t-win32.whl", hash = "sha256:a178df8ec03011153fbcd2c70cb961bc98cbbd9694b28f706c318bee8927c3db", size = 268968, upload-time = "2026-02-19T19:02:22.816Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/a0/ff24c6cb1273e42472706d277147fc38e1f9074a280fb6034b0fc9b69415/regex-2026.2.19-cp313-cp313t-win_amd64.whl", hash = "sha256:2c1693ca6f444d554aa246b592355b5cec030ace5a2729eae1b04ab6e853e768", size = 280390, upload-time = "2026-02-19T19:02:25.231Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/b6/a3f6ad89d780ffdeebb4d5e2e3e30bd2ef1f70f6a94d1760e03dd1e12c60/regex-2026.2.19-cp313-cp313t-win_arm64.whl", hash = "sha256:c0761d7ae8d65773e01515ebb0b304df1bf37a0a79546caad9cbe79a42c12af7", size = 271643, upload-time = "2026-02-19T19:02:27.175Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/e2/7ad4e76a6dddefc0d64dbe12a4d3ca3947a19ddc501f864a5df2a8222ddd/regex-2026.2.19-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:03d191a9bcf94d31af56d2575210cb0d0c6a054dbcad2ea9e00aa4c42903b919", size = 489306, upload-time = "2026-02-19T19:02:29.058Z" },
+    { url = "https://files.pythonhosted.org/packages/14/95/ee1736135733afbcf1846c58671046f99c4d5170102a150ebb3dd8d701d9/regex-2026.2.19-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:516ee067c6c721d0d0bfb80a2004edbd060fffd07e456d4e1669e38fe82f922e", size = 291218, upload-time = "2026-02-19T19:02:31.083Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/08/180d1826c3d7065200a5168c6b993a44947395c7bb6e04b2c2a219c34225/regex-2026.2.19-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:997862c619994c4a356cb7c3592502cbd50c2ab98da5f61c5c871f10f22de7e5", size = 289097, upload-time = "2026-02-19T19:02:33.485Z" },
+    { url = "https://files.pythonhosted.org/packages/28/93/0651924c390c5740f5f896723f8ddd946a6c63083a7d8647231c343912ff/regex-2026.2.19-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02b9e1b8a7ebe2807cd7bbdf662510c8e43053a23262b9f46ad4fc2dfc9d204e", size = 799147, upload-time = "2026-02-19T19:02:35.669Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/00/2078bd8bcd37d58a756989adbfd9f1d0151b7ca4085a9c2a07e917fbac61/regex-2026.2.19-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6c8fb3b19652e425ff24169dad3ee07f99afa7996caa9dfbb3a9106cd726f49a", size = 865239, upload-time = "2026-02-19T19:02:38.012Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/13/75195161ec16936b35a365fa8c1dd2ab29fd910dd2587765062b174d8cfc/regex-2026.2.19-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50f1ee9488dd7a9fda850ec7c68cad7a32fa49fd19733f5403a3f92b451dcf73", size = 911904, upload-time = "2026-02-19T19:02:40.737Z" },
+    { url = "https://files.pythonhosted.org/packages/96/72/ac42f6012179343d1c4bd0ffee8c948d841cb32ea188d37e96d80527fcc9/regex-2026.2.19-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ab780092b1424d13200aa5a62996e95f65ee3db8509be366437439cdc0af1a9f", size = 803518, upload-time = "2026-02-19T19:02:42.923Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/d1/75a08e2269b007b9783f0f86aa64488e023141219cb5f14dc1e69cda56c6/regex-2026.2.19-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:17648e1a88e72d88641b12635e70e6c71c5136ba14edba29bf8fc6834005a265", size = 775866, upload-time = "2026-02-19T19:02:45.189Z" },
+    { url = "https://files.pythonhosted.org/packages/92/41/70e7d05faf6994c2ca7a9fcaa536da8f8e4031d45b0ec04b57040ede201f/regex-2026.2.19-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f914ae8c804c8a8a562fe216100bc156bfb51338c1f8d55fe32cf407774359a", size = 788224, upload-time = "2026-02-19T19:02:47.804Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/83/34a2dd601f9deb13c20545c674a55f4a05c90869ab73d985b74d639bac43/regex-2026.2.19-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c7e121a918bbee3f12ac300ce0a0d2f2c979cf208fb071ed8df5a6323281915c", size = 859682, upload-time = "2026-02-19T19:02:50.583Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/30/136db9a09a7f222d6e48b806f3730e7af6499a8cad9c72ac0d49d52c746e/regex-2026.2.19-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2fedd459c791da24914ecc474feecd94cf7845efb262ac3134fe27cbd7eda799", size = 764223, upload-time = "2026-02-19T19:02:52.777Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/ea/bb947743c78a16df481fa0635c50aa1a439bb80b0e6dc24cd4e49c716679/regex-2026.2.19-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:ea8dfc99689240e61fb21b5fc2828f68b90abf7777d057b62d3166b7c1543c4c", size = 850101, upload-time = "2026-02-19T19:02:55.87Z" },
+    { url = "https://files.pythonhosted.org/packages/25/27/e3bfe6e97a99f7393665926be02fef772da7f8aa59e50bc3134e4262a032/regex-2026.2.19-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fff45852160960f29e184ec8a5be5ab4063cfd0b168d439d1fc4ac3744bf29e", size = 789904, upload-time = "2026-02-19T19:02:58.523Z" },
+    { url = "https://files.pythonhosted.org/packages/84/7b/7e2be6f00cea59d08761b027ad237002e90cac74b1607200ebaa2ba3d586/regex-2026.2.19-cp314-cp314-win32.whl", hash = "sha256:5390b130cce14a7d1db226a3896273b7b35be10af35e69f1cca843b6e5d2bb2d", size = 271784, upload-time = "2026-02-19T19:03:00.418Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/f6/639911530335773e7ec60bcaa519557b719586024c1d7eaad1daf87b646b/regex-2026.2.19-cp314-cp314-win_amd64.whl", hash = "sha256:e581f75d5c0b15669139ca1c2d3e23a65bb90e3c06ba9d9ea194c377c726a904", size = 280506, upload-time = "2026-02-19T19:03:02.302Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/ec/2582b56b4e036d46bb9b5d74a18548439ffa16c11cf59076419174d80f48/regex-2026.2.19-cp314-cp314-win_arm64.whl", hash = "sha256:7187fdee1be0896c1499a991e9bf7c78e4b56b7863e7405d7bb687888ac10c4b", size = 273557, upload-time = "2026-02-19T19:03:04.836Z" },
+    { url = "https://files.pythonhosted.org/packages/49/0b/f901cfeb4efd83e4f5c3e9f91a6de77e8e5ceb18555698aca3a27e215ed3/regex-2026.2.19-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:5ec1d7c080832fdd4e150c6f5621fe674c70c63b3ae5a4454cebd7796263b175", size = 492196, upload-time = "2026-02-19T19:03:08.188Z" },
+    { url = "https://files.pythonhosted.org/packages/94/0a/349b959e3da874e15eda853755567b4cde7e5309dbb1e07bfe910cfde452/regex-2026.2.19-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8457c1bc10ee9b29cdfd897ccda41dce6bde0e9abd514bcfef7bcd05e254d411", size = 292878, upload-time = "2026-02-19T19:03:10.272Z" },
+    { url = "https://files.pythonhosted.org/packages/98/b0/9d81b3c2c5ddff428f8c506713737278979a2c476f6e3675a9c51da0c389/regex-2026.2.19-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cce8027010d1ffa3eb89a0b19621cdc78ae548ea2b49fea1f7bfb3ea77064c2b", size = 291235, upload-time = "2026-02-19T19:03:12.5Z" },
+    { url = "https://files.pythonhosted.org/packages/04/e7/be7818df8691dbe9508c381ea2cc4c1153e4fdb1c4b06388abeaa93bd712/regex-2026.2.19-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11c138febb40546ff9e026dbbc41dc9fb8b29e61013fa5848ccfe045f5b23b83", size = 807893, upload-time = "2026-02-19T19:03:15.064Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/b6/b898a8b983190cfa0276031c17beb73cfd1db07c03c8c37f606d80b655e2/regex-2026.2.19-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:74ff212aa61532246bb3036b3dfea62233414b0154b8bc3676975da78383cac3", size = 873696, upload-time = "2026-02-19T19:03:17.848Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/98/126ba671d54f19080ec87cad228fb4f3cc387fff8c4a01cb4e93f4ff9d94/regex-2026.2.19-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d00c95a2b6bfeb3ea1cb68d1751b1dfce2b05adc2a72c488d77a780db06ab867", size = 915493, upload-time = "2026-02-19T19:03:20.343Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/10/550c84a1a1a7371867fe8be2bea7df55e797cbca4709974811410e195c5d/regex-2026.2.19-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:311fcccb76af31be4c588d5a17f8f1a059ae8f4b097192896ebffc95612f223a", size = 813094, upload-time = "2026-02-19T19:03:23.287Z" },
+    { url = "https://files.pythonhosted.org/packages/29/fb/ba221d2fc76a27b6b7d7a60f73a7a6a7bac21c6ba95616a08be2bcb434b0/regex-2026.2.19-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:77cfd6b5e7c4e8bf7a39d243ea05882acf5e3c7002b0ef4756de6606893b0ecd", size = 781583, upload-time = "2026-02-19T19:03:26.872Z" },
+    { url = "https://files.pythonhosted.org/packages/26/f1/af79231301297c9e962679efc04a31361b58dc62dec1fc0cb4b8dd95956a/regex-2026.2.19-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6380f29ff212ec922b6efb56100c089251940e0526a0d05aa7c2d9b571ddf2fe", size = 795875, upload-time = "2026-02-19T19:03:29.223Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/90/1e1d76cb0a2d0a4f38a039993e1c5cd971ae50435d751c5bae4f10e1c302/regex-2026.2.19-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:655f553a1fa3ab8a7fd570eca793408b8d26a80bfd89ed24d116baaf13a38969", size = 868916, upload-time = "2026-02-19T19:03:31.415Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/67/a1c01da76dbcfed690855a284c665cc0a370e7d02d1bd635cf9ff7dd74b8/regex-2026.2.19-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:015088b8558502f1f0bccd58754835aa154a7a5b0bd9d4c9b7b96ff4ae9ba876", size = 770386, upload-time = "2026-02-19T19:03:33.972Z" },
+    { url = "https://files.pythonhosted.org/packages/49/6f/94842bf294f432ff3836bfd91032e2ecabea6d284227f12d1f935318c9c4/regex-2026.2.19-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9e6693b8567a59459b5dda19104c4a4dbbd4a1c78833eacc758796f2cfef1854", size = 855007, upload-time = "2026-02-19T19:03:36.238Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/93/393cd203ca0d1d368f05ce12d2c7e91a324bc93c240db2e6d5ada05835f4/regex-2026.2.19-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4071209fd4376ab5ceec72ad3507e9d3517c59e38a889079b98916477a871868", size = 799863, upload-time = "2026-02-19T19:03:38.497Z" },
+    { url = "https://files.pythonhosted.org/packages/43/d9/35afda99bd92bf1a5831e55a4936d37ea4bed6e34c176a3c2238317faf4f/regex-2026.2.19-cp314-cp314t-win32.whl", hash = "sha256:2905ff4a97fad42f2d0834d8b1ea3c2f856ec209837e458d71a061a7d05f9f01", size = 274742, upload-time = "2026-02-19T19:03:40.804Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/42/7edc3344dcc87b698e9755f7f685d463852d481302539dae07135202d3ca/regex-2026.2.19-cp314-cp314t-win_amd64.whl", hash = "sha256:64128549b600987e0f335c2365879895f860a9161f283b14207c800a6ed623d3", size = 284443, upload-time = "2026-02-19T19:03:42.954Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/45/affdf2d851b42adf3d13fc5b3b059372e9bd299371fd84cf5723c45871fa/regex-2026.2.19-cp314-cp314t-win_arm64.whl", hash = "sha256:a09ae430e94c049dc6957f6baa35ee3418a3a77f3c12b6e02883bd80a2b679b0", size = 274932, upload-time = "2026-02-19T19:03:45.488Z" },
 ]
 
 [[package]]
@@ -5761,19 +5539,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" },
 ]
 
-[[package]]
-name = "responses"
-version = "0.18.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "requests", marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-    { name = "urllib3", marker = "python_full_version >= '3.14' and sys_platform == 'win32'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/03/a5/186653e51cb20fe3ac793403334d4d077fbb7bb18a9c5c2fce8304d5a2e2/responses-0.18.0.tar.gz", hash = "sha256:380cad4c1c1dc942e5e8a8eaae0b4d4edf708f4f010db8b7bcfafad1fcd254ff", size = 45885, upload-time = "2022-02-02T19:59:52.834Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/f3/2b3a6dc5986303b3dd1bbbcf482022acb2583c428cd23f0b6d37b1a1a519/responses-0.18.0-py3-none-any.whl", hash = "sha256:15c63ad16de13ee8e7182d99c9334f64fd81f1ee79f90748d527c28f7ca9dd51", size = 38735, upload-time = "2022-02-02T19:59:52.833Z" },
-]
-
 [[package]]
 name = "rich"
 version = "14.3.3"
@@ -5919,6 +5684,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" },
 ]
 
+[[package]]
+name = "rsa"
+version = "4.9.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyasn1" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034, upload-time = "2025-04-16T09:51:18.218Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" },
+]
+
 [[package]]
 name = "ruff"
 version = "0.9.10"
@@ -5946,16 +5723,16 @@ wheels = [
 
 [[package]]
 name = "s3fs"
-version = "2026.2.0"
+version = "2025.10.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiobotocore" },
     { name = "aiohttp" },
-    { name = "fsspec" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" } },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/fa/be/392c8c5e0da9bfa139e41084690dd49a5e3e931099f78f52d3f6070105c6/s3fs-2026.2.0.tar.gz", hash = "sha256:91cb2a9f76e35643b76eeac3f47a6165172bb3def671f76b9111c8dd5779a2ac", size = 84152, upload-time = "2026-02-05T21:57:57.968Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/bb/ee/7cf7de3b17ef6db10b027cc9f8a1108ceb6333e267943e666a35882b1474/s3fs-2025.10.0.tar.gz", hash = "sha256:e8be6cddc77aceea1681ece0f472c3a7f8ef71a0d2acddb1cc92bb6afa3e9e4f", size = 80383, upload-time = "2025-10-30T15:06:04.647Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/57/e1/64c264db50b68de8a438b60ceeb921b2f22da3ebb7ad6255150225d0beac/s3fs-2026.2.0-py3-none-any.whl", hash = "sha256:65198835b86b1d5771112b0085d1da52a6ede36508b1aaa6cae2aedc765dfe10", size = 31328, upload-time = "2026-02-05T21:57:56.532Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/fc/56cba14af8ad8fd020c85b6e44328520ac55939bb1f9d01444ad470504cb/s3fs-2025.10.0-py3-none-any.whl", hash = "sha256:da7ef25efc1541f5fca8e1116361e49ea1081f83f4e8001fbd77347c625da28a", size = 30357, upload-time = "2025-10-30T15:06:03.48Z" },
 ]
 
 [[package]]
@@ -5989,7 +5766,8 @@ name = "scipy"
 version = "1.15.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.11'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
@@ -6048,21 +5826,25 @@ name = "scipy"
 version = "1.17.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" }
 wheels = [
@@ -6194,15 +5976,15 @@ wheels = [
 
 [[package]]
 name = "sentry-sdk"
-version = "2.54.0"
+version = "2.53.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c8/e9/2e3a46c304e7fa21eaa70612f60354e32699c7102eb961f67448e222ad7c/sentry_sdk-2.54.0.tar.gz", hash = "sha256:2620c2575128d009b11b20f7feb81e4e4e8ae08ec1d36cbc845705060b45cc1b", size = 413813, upload-time = "2026-03-02T15:12:41.355Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/06/66c8b705179bc54087845f28fd1b72f83751b6e9a195628e2e9af9926505/sentry_sdk-2.53.0.tar.gz", hash = "sha256:6520ef2c4acd823f28efc55e43eb6ce2e6d9f954a95a3aa96b6fd14871e92b77", size = 412369, upload-time = "2026-02-16T11:11:14.743Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/53/39/be412cc86bc6247b8f69e9383d7950711bd86f8d0a4a4b0fe8fad685bc21/sentry_sdk-2.54.0-py2.py3-none-any.whl", hash = "sha256:fd74e0e281dcda63afff095d23ebcd6e97006102cdc8e78a29f19ecdf796a0de", size = 439198, upload-time = "2026-03-02T15:12:39.546Z" },
+    { url = "https://files.pythonhosted.org/packages/47/d4/2fdf854bc3b9c7f55219678f812600a20a138af2dd847d99004994eada8f/sentry_sdk-2.53.0-py2.py3-none-any.whl", hash = "sha256:46e1ed8d84355ae54406c924f6b290c3d61f4048625989a723fd622aab838899", size = 437908, upload-time = "2026-02-16T11:11:13.227Z" },
 ]
 
 [[package]]
@@ -6234,11 +6016,11 @@ wheels = [
 
 [[package]]
 name = "slack-sdk"
-version = "3.41.0"
+version = "3.40.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/22/35/fc009118a13187dd9731657c60138e5a7c2dea88681a7f04dc406af5da7d/slack_sdk-3.41.0.tar.gz", hash = "sha256:eb61eb12a65bebeca9cb5d36b3f799e836ed2be21b456d15df2627cfe34076ca", size = 250568, upload-time = "2026-03-12T16:10:11.381Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/3a/18/784859b33a3f9c8cdaa1eda4115eb9fe72a0a37304718887d12991eeb2fd/slack_sdk-3.40.1.tar.gz", hash = "sha256:a215333bc251bc90abf5f5110899497bf61a3b5184b6d9ee35d73ebf09ec3fd0", size = 250379, upload-time = "2026-02-18T22:11:01.819Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a1/df/2e4be347ff98281b505cc0ccf141408cdd25eb5ca9f3830deb361b2472d3/slack_sdk-3.41.0-py2.py3-none-any.whl", hash = "sha256:bb18dcdfff1413ec448e759cf807ec3324090993d8ab9111c74081623b692a89", size = 313885, upload-time = "2026-03-12T16:10:09.811Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/e1/bb81f93c9f403e3b573c429dd4838ec9b44e4ef35f3b0759eb49557ab6e3/slack_sdk-3.40.1-py2.py3-none-any.whl", hash = "sha256:cd8902252979aa248092b0d77f3a9ea3cc605bc5d53663ad728e892e26e14a65", size = 313687, upload-time = "2026-02-18T22:11:00.027Z" },
 ]
 
 [[package]]
@@ -6246,7 +6028,8 @@ name = "smart-open"
 version = "7.5.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "wrapt" },
+    { name = "wrapt", version = "1.17.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "wrapt", version = "2.1.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e8/be/a66598b305763861a9ab15ff0f2fbc44e47b1ce7a776797337a4eef37c66/smart_open-7.5.1.tar.gz", hash = "sha256:3f08e16827c4733699e6b2cc40328a3568f900cb12ad9a3ad233ba6c872d9fe7", size = 54034, upload-time = "2026-02-23T11:01:28.979Z" }
 wheels = [
@@ -6255,11 +6038,11 @@ wheels = [
 
 [[package]]
 name = "smmap"
-version = "5.0.3"
+version = "5.0.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/1f/ea/49c993d6dfdd7338c9b1000a0f36817ed7ec84577ae2e52f890d1a4ff909/smmap-5.0.3.tar.gz", hash = "sha256:4d9debb8b99007ae47165abc08670bd74cb74b5227dda7f643eccc4e9eb5642c", size = 22506, upload-time = "2026-03-09T03:43:26.1Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329, upload-time = "2025-01-02T07:14:40.909Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c1/d4/59e74daffcb57a07668852eeeb6035af9f32cbfd7a1d2511f17d2fe6a738/smmap-5.0.3-py3-none-any.whl", hash = "sha256:c106e05d5a61449cf6ba9a1e650227ecfb141590d2a98412103ff35d89fc7b2f", size = 24390, upload-time = "2026-03-09T03:43:24.361Z" },
+    { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload-time = "2025-01-02T07:14:38.724Z" },
 ]
 
 [[package]]
@@ -6296,7 +6079,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e1/41/9b873a8c055582859b239be17902a85339bec6a30ad162f98c9b0288a2cc/soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b", size = 46156, upload-time = "2025-01-25T09:17:04.831Z" }
 wheels = [
@@ -6323,7 +6106,8 @@ name = "sphinx"
 version = "8.1.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.11'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "alabaster", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -6354,9 +6138,10 @@ name = "sphinx"
 version = "9.0.4"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
     { name = "alabaster", marker = "python_full_version == '3.11.*' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -6387,15 +6172,18 @@ name = "sphinx"
 version = "9.1.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
     { name = "alabaster", marker = "python_full_version >= '3.12' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -6426,7 +6214,8 @@ name = "sphinx-autobuild"
 version = "2024.10.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.11'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "colorama", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -6446,18 +6235,22 @@ name = "sphinx-autobuild"
 version = "2025.8.25"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
     { name = "colorama", marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
@@ -6591,11 +6384,11 @@ wheels = [
 
 [[package]]
 name = "tabulate"
-version = "0.10.0"
+version = "0.9.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/46/58/8c37dea7bbf769b20d58e7ace7e5edfe65b849442b00ffcdd56be88697c6/tabulate-0.10.0.tar.gz", hash = "sha256:e2cfde8f79420f6deeffdeda9aaec3b6bc5abce947655d17ac662b126e48a60d", size = 91754, upload-time = "2026-03-04T18:55:34.402Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl", hash = "sha256:f0b0622e567335c8fabaaa659f1b33bcb6ddfe2e496071b743aa113f8774f2d3", size = 39814, upload-time = "2026-03-04T18:55:31.284Z" },
+    { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" },
 ]
 
 [[package]]
@@ -6620,7 +6413,7 @@ dependencies = [
     { name = "grpcio" },
     { name = "markdown" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pillow" },
     { name = "protobuf" },
@@ -6647,7 +6440,8 @@ name = "tensorstore"
 version = "0.1.78"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.11'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
 ]
 dependencies = [
     { name = "ml-dtypes", marker = "python_full_version < '3.11'" },
@@ -6679,90 +6473,56 @@ wheels = [
 
 [[package]]
 name = "tensorstore"
-version = "0.1.82"
+version = "0.1.81"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
     "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
     "python_full_version == '3.11.*' and sys_platform == 'win32'",
     "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
-    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
     { name = "ml-dtypes", marker = "python_full_version >= '3.11'" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/cd/9b/43aedb544937f214dd7c665a7edf1b8b74f2f55d53ebd351c0ce69acf81a/tensorstore-0.1.82.tar.gz", hash = "sha256:ccfceffb7611fc61330f6da24b8b0abd9251d480ac8a5bac5a1729f9ed0c3a9f", size = 7160364, upload-time = "2026-03-13T00:22:16.888Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/5b/d2/66513f1782dc52425bda0d5f7baae94ea639bbd226650ecb000223cc9359/tensorstore-0.1.82-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6ae87ae9baf7593b5c8d09dbdf3ee6969068833a6fd85317b781a4cf7cb7e533", size = 16555813, upload-time = "2026-03-13T00:21:24.802Z" },
-    { url = "https://files.pythonhosted.org/packages/04/4f/66a8af7dd6f5d8dabebe6edcdf0b87a06ac1f92318d972e9e6f5d3754b5d/tensorstore-0.1.82-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2471638a184473e384a6c3ffd98453b670a78372f2d3ed9707f27aebe5482c47", size = 14899141, upload-time = "2026-03-13T00:21:27.591Z" },
-    { url = "https://files.pythonhosted.org/packages/36/50/7a9840eb6c9ec52348dcadf8ef2dca7b2cb7d3ae25bafb672a236fd885f4/tensorstore-0.1.82-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38eed3828101622552e63564d7a3a10b0cecb05f61d40e0f236b95f622a60897", size = 19339518, upload-time = "2026-03-13T00:21:29.885Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/5f/85b42d1173b0ebbd1c11879f8ff60a72d7f5bbc111255d2c685a33813f2a/tensorstore-0.1.82-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aed5a6fc605e711c8a8dbd8ae73b919b8c6ca04ae94b0e0f6489fc54cdcab245", size = 20947623, upload-time = "2026-03-13T00:21:32.084Z" },
-    { url = "https://files.pythonhosted.org/packages/11/23/dcbd9ab116d58d3a1ed9686102592c032b7ffd558aa8626fff1c18701ccd/tensorstore-0.1.82-cp311-cp311-win_amd64.whl", hash = "sha256:afb825258329241341aa3e64293b64562df7812a02d5f6c6e4c9f731d0e34b0e", size = 13387579, upload-time = "2026-03-13T00:21:34.393Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/c3/5ab0b99487b2596bdc0ebd3a569e50415949a63bad90b18e6476de91a7bb/tensorstore-0.1.82-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:f0ac091bd47ea6f051fe11230ad2642c254b46a8fabdd5184b0600556b5529ed", size = 16570668, upload-time = "2026-03-13T00:21:36.386Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/95/92b00a4b2e6192528a9c5bac9f53007acf4aa5d54943b9e114bedb72b2da/tensorstore-0.1.82-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8cae7d0c9b2fa0653f90b147daaf9ed04664cab7d297b9772efcfa088da26cab", size = 14904517, upload-time = "2026-03-13T00:21:38.464Z" },
-    { url = "https://files.pythonhosted.org/packages/46/7e/c9c8ad65ee4015787e32d31bcf8278fcb27109e809f8334a64285bd73028/tensorstore-0.1.82-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:34c491ea3c6c1904d4618bfe40020bd83aaeb19d52a266ea0f6919eb3fdc64c4", size = 19344428, upload-time = "2026-03-13T00:21:40.575Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/8a/590bb60a190d414abd2f83dd5b5148722d0c5d310a73e21b7a60ab98cf00/tensorstore-0.1.82-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d4182300d8ffa172e961e79c6bd89e38ce6bc5cd3abf1a7dacb22c2396ce40b7", size = 20964954, upload-time = "2026-03-13T00:21:42.515Z" },
-    { url = "https://files.pythonhosted.org/packages/43/1c/34e6e97426e1718106e9cb74d3045992bdea3ee368f9ea4ea25b809bdba8/tensorstore-0.1.82-cp312-cp312-win_amd64.whl", hash = "sha256:6369809d01edf66cd487cde5c94f57138167c09561f3d906020fd53c72687f92", size = 13393361, upload-time = "2026-03-13T00:21:44.443Z" },
-    { url = "https://files.pythonhosted.org/packages/58/d1/0b39f577f047340f7c466e7f929aba0b83d33a852952ae2dc4242c141ee6/tensorstore-0.1.82-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:9874349ff23a9e94df361e7a0378efd3f22a1b14c1bb4d00905e6477eb56b732", size = 16570239, upload-time = "2026-03-13T00:21:46.655Z" },
-    { url = "https://files.pythonhosted.org/packages/be/41/d33bea17f9afaee862f268fc10c364997267ab29b9be2aeebe01105cb38b/tensorstore-0.1.82-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cb2b87e8df78dc629e09a001d19b64813f249f9c78e4ade76de26e18f68bc591", size = 14904654, upload-time = "2026-03-13T00:21:48.708Z" },
-    { url = "https://files.pythonhosted.org/packages/16/b9/f9f3d00e84724968d1111bbcf5b9ec2797496f4849e86a4fdea7278f7b0d/tensorstore-0.1.82-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3e0d4f5240247986c66154c3e6c71deed5ef337ae5a52509b3125c8045717bb3", size = 19343727, upload-time = "2026-03-13T00:21:50.664Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/8f/570fb1069b9789b47376bdc8129371bd3dc62bbaf57054816527e79ff88a/tensorstore-0.1.82-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9f2c51d0c40a3a4e49590a1ec07494c518c46905c8f3ec1f5583120cfba3b2cf", size = 20964994, upload-time = "2026-03-13T00:21:52.918Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/d7/e1f168c6d82fd4af1acfade95f0ba4fe3593bac9e9a81ec074a80fe6258c/tensorstore-0.1.82-cp313-cp313-win_amd64.whl", hash = "sha256:82bbac5e11eeaa80ad1aedad1c7a8f1f4f39362c5f56906820b21fc34a497100", size = 13393826, upload-time = "2026-03-13T00:21:55.459Z" },
-    { url = "https://files.pythonhosted.org/packages/95/c2/c75d42a223b5367ae0b7e10c847f6180139582cdaf51e30e28ad29721fd6/tensorstore-0.1.82-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:aa9d7b3f092a65b5573e6c9919bea1e16c909844f346c82407dc454a67a3fa11", size = 16574644, upload-time = "2026-03-13T00:21:57.382Z" },
-    { url = "https://files.pythonhosted.org/packages/37/86/b2c19cc443c9fb69d682d0e5d67ac4c165edde4e4a92adbcaa6a1ec084ed/tensorstore-0.1.82-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:32f70923d3a5dd687ebfd4eb9d0892766bff9acef92a468852c1872e96bbb440", size = 14906299, upload-time = "2026-03-13T00:21:59.563Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/71/e88cd2e6859adbd414669827800b98db646ce5156b264a34f4f0fbeb488b/tensorstore-0.1.82-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35607c5c0135d31c1b7bd821ad0446840161708a289df52cffc796d0321f3d60", size = 19345817, upload-time = "2026-03-13T00:22:01.682Z" },
-    { url = "https://files.pythonhosted.org/packages/65/e8/48dfcf42c344980564e01052900fb2a3a28d90d515133fe69bdded70df6c/tensorstore-0.1.82-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:54d40a696115a8d13184920842a20c570bdb1cb3ba2352b05394814608290f6a", size = 20966508, upload-time = "2026-03-13T00:22:04.61Z" },
-    { url = "https://files.pythonhosted.org/packages/16/65/2e465b576f61618a8a1a0e068811298a7338e9163713bcc24f5fe4abbf6c/tensorstore-0.1.82-cp314-cp314-win_amd64.whl", hash = "sha256:c7f63af7aabdf3a3e224d5b36c924bcb59ebc4fb8e485edc8fe13b8bf8b1ba32", size = 13785613, upload-time = "2026-03-13T00:22:06.643Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/e3/49a49e0b1605a58f31aed5ee3833b3a088984b16b5c3e7efaf34bd990ccb/tensorstore-0.1.82-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:69950d352327473014299a57f4c9fc7e0caa9c9e9100b3bc0a0c37f79c47fe6d", size = 16651920, upload-time = "2026-03-13T00:22:08.539Z" },
-    { url = "https://files.pythonhosted.org/packages/77/69/bb0b929a2b1a1b72f15f6d9c5337b3ce0117de625f46345f56c815c106ee/tensorstore-0.1.82-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0224e20fad9ca9538c3e8ac4a32ef354acaa7ab2c130e4944c2eda58c3200742", size = 14988973, upload-time = "2026-03-13T00:22:10.493Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/e6/847146a4d802fd258eb032226ce3153167c4d0f44f4176633a77beb3af14/tensorstore-0.1.82-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c45dae1b34cad5bd56796e961c35ceb5a70617e4eb182faf73dd9cc4b21f3f87", size = 19365580, upload-time = "2026-03-13T00:22:12.679Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/06/46261b7ec4f6707edf9da8d4a2d68b4819b599e0f9b4906d5bfcec7fd5b2/tensorstore-0.1.82-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d8678ce55c4ca9daac815995d47aae6d3648c75dcdbb9f01326067ccc4de10a", size = 20981853, upload-time = "2026-03-13T00:22:14.817Z" },
-]
-
-[[package]]
-name = "tibs"
-version = "0.5.7"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/57/cd/6cf028decf1c2df4d26077dd5d0532587d93d4917233d5e004133166a940/tibs-0.5.7.tar.gz", hash = "sha256:173dfbecb2309edd9771f453580c88cf251e775613461566b23dbd756b3d54cb", size = 78255, upload-time = "2026-03-12T13:06:29.79Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a5/4f/1149a5cf2c1be6862e1dcba0c22134c43c44f05ddeef4697ecf20067e508/tibs-0.5.7-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:01ea5258bdf942d21560dc07d532082cd04f07cfef65fedd58ae84f7d0d2562a", size = 401281, upload-time = "2026-03-12T13:06:25.78Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/af/59041580d51eb06077029cc64f0b2f9165b1c87075b7fe85f400e01ec6f9/tibs-0.5.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f5eea45851c960628a2bd29847765d55e19a687c5374456ad2c8cf6410eb1efa", size = 377945, upload-time = "2026-03-12T13:06:42.493Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/73/3b614d39221f02fca2f37dcdc1c65e25c963bf1da4b90ad9db393f9c130d/tibs-0.5.7-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a9feed5931b881809a950eca0e01e757113e2383a2af06a3e6982f110c869e2", size = 409620, upload-time = "2026-03-12T13:06:28.611Z" },
-    { url = "https://files.pythonhosted.org/packages/16/a6/917ca6ca266135f0f52041700c4eb766097258dd987b81a630c061969db5/tibs-0.5.7-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:501728d096e10d9a165aa526743d47418a6bbfd7b084fa47ecb22be7641d3edb", size = 426017, upload-time = "2026-03-12T13:06:40.139Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/f6/3c795420f81bac44390d897712aebe186186d88ea5653e20f4ac5097b0b1/tibs-0.5.7-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:77103a9f1af72ac4cf5006828d0fb21578d19ce55fd990e9a1c8e46fd549561f", size = 449717, upload-time = "2026-03-12T13:06:45.416Z" },
-    { url = "https://files.pythonhosted.org/packages/98/00/700b97377b55973ac233a280d6ff81c0187710c73a5ac3356ef79bf15eb2/tibs-0.5.7-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f95d5db62960205a1e9eba73ce67dc14e7366ae080cd4e5b6f005ebd90faf02", size = 453131, upload-time = "2026-03-12T13:06:46.623Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/41/38ccfe6fe48432ea20f6e6a49a42aeb9662042e5f4e8f9a4029047a6c44a/tibs-0.5.7-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ace018a057459e3dccd06a4aae1c5c8cd57e352b263dcef534ae39bf3e03b5cf", size = 419054, upload-time = "2026-03-12T13:06:27.25Z" },
-    { url = "https://files.pythonhosted.org/packages/73/08/d9a66639564b92d5be07eb30bbd7a5b9052f338da09fd4ec3732346ff129/tibs-0.5.7-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2a618de62004d9217d2d2ab0f7f9bbdd098c12642dc01f07b3fb00f0b5f3131a", size = 448585, upload-time = "2026-03-12T13:06:33.306Z" },
-    { url = "https://files.pythonhosted.org/packages/70/c1/24131985486d5bf878468226d9d0bdff5a0b04838b773a7339d22965f74e/tibs-0.5.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42725200f1b02687ed6e6a1c01e0ec150dc829d21d901ffc74cc0ac4d821f57f", size = 586259, upload-time = "2026-03-12T13:06:14.095Z" },
-    { url = "https://files.pythonhosted.org/packages/02/0c/f74c6672d28054c55b6c593588792858be420dbf4b56d0adbf79fc1b7f8f/tibs-0.5.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:63255749f937c5e6fedcc7d54e7bd359aef711017e6855f373b0510a14ee2215", size = 701427, upload-time = "2026-03-12T13:06:37.234Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/bf/2c39836a5a1664cda596ba069d065322976245a5f86dab9f2b9a3eaff024/tibs-0.5.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:4b7510235379368b7523f624d46e0680f3706e3a3965877a6583cdcb598b8bac", size = 660754, upload-time = "2026-03-12T13:06:38.67Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/77/5a7a10001c38f4d1266d4f7a84fae27357c88834a0266bc401e37e1a7884/tibs-0.5.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:29480bf03e3372a5f9cc59ea0541f76f8efd696d4f0d214715e94247c342a037", size = 631034, upload-time = "2026-03-12T13:06:18.1Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/be/bb20938ab5d1e63ee4c5cf78be815ab2a8674e7aa0b2500db210f7db3e6d/tibs-0.5.7-cp314-cp314t-win32.whl", hash = "sha256:b9535dc7b7484904a58b51bd8e64da7efbf1d8466ff7e84ed1d78f4ddc561c99", size = 278952, upload-time = "2026-03-12T13:06:20.285Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/9a/e76888e8567dbe02a67a27d46e5acf06e3504df1268ebc6d8313942ec560/tibs-0.5.7-cp314-cp314t-win_amd64.whl", hash = "sha256:1906729038b85c3b4c040aa28a456d85bc976d0c5007177350eb73374ffa0fd0", size = 294069, upload-time = "2026-03-12T13:06:15.756Z" },
-    { url = "https://files.pythonhosted.org/packages/10/37/f74a5f4288984cb909dbccd4cc254154f3ed97b16db1913406f1bd2914c9/tibs-0.5.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7d6592ed93c6748acd39df484c1ee24d40ee247c2a20ca38ba03363506fd24f3", size = 278929, upload-time = "2026-03-12T13:06:43.962Z" },
-    { url = "https://files.pythonhosted.org/packages/12/2d/de2c579d3eea0f18212b5b16decb04568b7a0ef912d00581a77492609d4e/tibs-0.5.7-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:859f05315ffb307d3474c505d694f3a547f00730a024c982f5f60316a5505b3c", size = 411352, upload-time = "2026-03-12T13:06:52.016Z" },
-    { url = "https://files.pythonhosted.org/packages/74/71/4c21ccc5c2e1672f9cd91ed2c46604c250cffd9d386113772dded128b5cf/tibs-0.5.7-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:a883ca13a922a66b2c1326a9c188123a574741a72510a4bf52fd6f97db191e44", size = 383971, upload-time = "2026-03-12T13:06:50.143Z" },
-    { url = "https://files.pythonhosted.org/packages/38/85/399940ac5393772792a209911a5efa42cf55cf621771e48b863211ac5a2a/tibs-0.5.7-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f70bd250769381c73110d6f24feaf8b6fcd44f680b3cb28a20ea06db3d04fb6f", size = 416256, upload-time = "2026-03-12T13:06:24.222Z" },
-    { url = "https://files.pythonhosted.org/packages/02/94/481a73e74d398949f57d297b1809a10a951d252e7ec94b6715ed952ce500/tibs-0.5.7-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:76746f01b3db9dbd802f5e615f11f68df7a29ecef521b082dca53f3fa7d0084f", size = 428003, upload-time = "2026-03-12T13:06:23.064Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/e0/72db1760a7f7fec1d5f3690e0855fbbccbcf0a4a2fd318c9d71f3b33f3a7/tibs-0.5.7-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:847709c108800ad6a45efaf9a040628278956938a4897f7427a2587013dc3b98", size = 455589, upload-time = "2026-03-12T13:06:53.144Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/26/9cd3395914bf705d6ae1e9a6c323f727e9dc88fef716327ce7f486e0b55a/tibs-0.5.7-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ad61df93b50f875b277ab736c5d37b6bce56f9abce489a22f4e02d9daa2966e3", size = 459266, upload-time = "2026-03-12T13:06:21.678Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/3b/267f19a008d13c704dc0b044138a56239272a43531ccb05464129d0fbd01/tibs-0.5.7-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e13b9c7ff2604b0146772025e1ac6f85c8c625bf6ac73736ff671eaf357dda41", size = 423466, upload-time = "2026-03-12T13:06:41.212Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/d4/424ae3515e0e013ad83186074bf3beb53399b9052c00da703415ccc316ca/tibs-0.5.7-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a7ce857ef05c59dc61abadc31c4b9b1e3c62f9e5fb29217988c308936aea71e", size = 452080, upload-time = "2026-03-12T13:06:32.112Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/15/ab80beba83a134745439d33763e1d3b017f994abeb9c309a3ac9fd94e90e/tibs-0.5.7-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1d5521cc6768bfa6282a0c591ba06b079ab91b5c7d5696925ad2abac59779a54", size = 592311, upload-time = "2026-03-12T13:06:47.807Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/21/f5cf41c15431e63aeaefb494e714d48d9e9061b4e01fcc01d1987e2e5faa/tibs-0.5.7-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:477608f9b87e24a22ab6d50b81da04a5cb59bfa49598ff7ec5165035a18fb392", size = 703400, upload-time = "2026-03-12T13:06:16.968Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/ec/b3bdb7dcc3de8513c5678a685f4e25bb85ef48526d7d535ddc592f9e8602/tibs-0.5.7-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:ac0aa2aae38f7325c91c261ce1d18f769c4c7033c98d6ea3ea5534585cf16452", size = 664623, upload-time = "2026-03-12T13:06:48.894Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/71/7b85af3ad1b2cd9871c8f50ba0eb17e54e12481b467678535e58aced0d98/tibs-0.5.7-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1b56583db148e5094d781c3d746815dbcbb6378c6f813c8ce291efd4ab21da8b", size = 635199, upload-time = "2026-03-12T13:06:34.798Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/63/60220fb502beb857306afd4a5bac4a8617ae496f3b1f4968d127380fdefe/tibs-0.5.7-cp38-abi3-win32.whl", hash = "sha256:d4f3ff613d486650816bc5516760c0382a2cc0ca8aeddd8914d011bc3b81d9a2", size = 288454, upload-time = "2026-03-12T13:06:30.978Z" },
-    { url = "https://files.pythonhosted.org/packages/46/ab/aab78827ba7e0d65fe346b86d1d61e0792c38d5f9b7547e0f71b7027c835/tibs-0.5.7-cp38-abi3-win_amd64.whl", hash = "sha256:a61d36155f8ab8642e1b6744e13822f72050fc7ec4f86ec6965295afa04949e2", size = 304135, upload-time = "2026-03-12T13:06:35.884Z" },
-    { url = "https://files.pythonhosted.org/packages/48/59/e9e6a610928a4bcbf04f0ac1436ee320aa8cbe95181f1aa32687c50e858b/tibs-0.5.7-cp38-abi3-win_arm64.whl", hash = "sha256:130bc68ff500fc8185677df7a97350b5d5339e6ba7e325bc3031337f6424ede7", size = 289272, upload-time = "2026-03-12T13:06:19.247Z" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/43/f6/e2403fc05b97ba74ad408a98a42c288e6e1b8eacc23780c153b0e5166179/tensorstore-0.1.81.tar.gz", hash = "sha256:687546192ea6f6c8ae28d18f13103336f68017d928b9f5a00325e9b0548d9c25", size = 7120819, upload-time = "2026-02-06T18:56:12.535Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cd/df/f472bd0dee801d7e33c53335ad0fcde9c71e5f9324241faa0a6b4be4270a/tensorstore-0.1.81-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:f64fb510f293079f9e5c63cb227e8a76904655a32912fc107c1e63bd8dc3e187", size = 16501390, upload-time = "2026-02-06T18:55:13.678Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/93/5f40c51d7b15d3574b1788a251dd4e3abd0415dab71811e126d2da5e826b/tensorstore-0.1.81-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4282587598885ff447f08369ac9bb681a65e224888cfa8ef8f3dd63544759e6c", size = 14535592, upload-time = "2026-02-06T18:55:16.44Z" },
+    { url = "https://files.pythonhosted.org/packages/76/48/b7adcc8eca502ce8050c18cea066ca0c0122df7a686e10da6470e55456b4/tensorstore-0.1.81-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9b4ea06038f6912bb6ed8a89db0c31e4e3d1b2404f3365dc756e4bc42bd6a89c", size = 19038732, upload-time = "2026-02-06T18:55:18.924Z" },
+    { url = "https://files.pythonhosted.org/packages/40/b0/99294895b030bd7d9ebc06e7ed523d0c09ab65667e031f8a67923f398f86/tensorstore-0.1.81-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51d59f7db9cdae02fce9d347300c0ccfb8265052945757e95592a265eb620b15", size = 21038447, upload-time = "2026-02-06T18:55:21.085Z" },
+    { url = "https://files.pythonhosted.org/packages/32/e6/1ce977baf09aa3889f10f04460b588a6c8876ea441e51090c671f0400a6f/tensorstore-0.1.81-cp311-cp311-win_amd64.whl", hash = "sha256:fdb9579a729cccc02127cab5abf26f57a0e27968ba65c9c548ad058f5a45417f", size = 13221673, upload-time = "2026-02-06T18:55:23.195Z" },
+    { url = "https://files.pythonhosted.org/packages/85/82/00037db699f74d792efe2696305ddd6932e04306899e3701824a7f7de961/tensorstore-0.1.81-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:7aefa1e3eadca804bce05215184c9cde29205ac2f3b443ca15a4e1846d31af4e", size = 16521245, upload-time = "2026-02-06T18:55:25.559Z" },
+    { url = "https://files.pythonhosted.org/packages/86/2e/1deca1b955cb959eec13fd342ffaa2fd84e4770b4e2bcb95a2f541875a52/tensorstore-0.1.81-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7e001d3edc6758eb5dc80556da9e945c1381f0529102fcc0301358ba6b9b70ed", size = 14543561, upload-time = "2026-02-06T18:55:27.624Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/e4/b4343eae773f72a8777f82c5328191a06d8a5195e62105c14b7dcc49823f/tensorstore-0.1.81-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6c27e07f4e91e6dc6a0878e13e2c5931d1716196b67b0df927f2f571de2576e9", size = 19043982, upload-time = "2026-02-06T18:55:30.076Z" },
+    { url = "https://files.pythonhosted.org/packages/31/6c/d8c8508a9f4a83dc910d2365c484ba0debf5e531782065e3657fc8fc9b54/tensorstore-0.1.81-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcb4786c4955e2d88d518b5b5a367427e3ad21d059cba366ad7aebf5fcc2302e", size = 21049171, upload-time = "2026-02-06T18:55:34.383Z" },
+    { url = "https://files.pythonhosted.org/packages/44/a9/c1a751e35a0fcff7f795398c4f98b6c8ea0f00fe7d7704f66a1e08d4352f/tensorstore-0.1.81-cp312-cp312-win_amd64.whl", hash = "sha256:b96cbf1ee74d9038762b2d81305ee1589ec89913a440df6cbd514bc5879655d2", size = 13226573, upload-time = "2026-02-06T18:55:36.463Z" },
+    { url = "https://files.pythonhosted.org/packages/06/c0/32f7d52bfcf1728f557cccb17ac85f57bcc3fa92f4034368d6e7d7d06406/tensorstore-0.1.81-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:7bb563ad4d4d6c4748d9fe4f01f639ddf4ffef83ac180fc3b6d73f46ad854e62", size = 16521316, upload-time = "2026-02-06T18:55:39.557Z" },
+    { url = "https://files.pythonhosted.org/packages/38/b9/06ffc44e38ca18aeb3973f6b709d4d2102e17a8d700c7c3e2af3f2830722/tensorstore-0.1.81-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2ff7e6c457596cf21f31c690e451fe634ac804fc98ff8131188e99d5ef7d29bc", size = 14543212, upload-time = "2026-02-06T18:55:42.246Z" },
+    { url = "https://files.pythonhosted.org/packages/00/01/3c27962f7258ad0bb552c3cd324fa2e01f746c8b6e81bd25d468f72204e8/tensorstore-0.1.81-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b218a6fe09c72c002f2c6480fc58b78cdbba8bb9c6f3a0d7dd1f70625cb37995", size = 19044489, upload-time = "2026-02-06T18:55:44.957Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/ea/fe0f14a1da96d6e0aa6c24d6c31f3ce4b203f8e8a1a2e359489e52b33400/tensorstore-0.1.81-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f33e7c11035c14dad01aeba012051643110cbb95c239e512106fe1be692c98b6", size = 21052658, upload-time = "2026-02-06T18:55:47.138Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/e2/cc189d799982f02c200b22405c4d3f28845df6321de2ac3a35ae087758ed/tensorstore-0.1.81-cp313-cp313-win_amd64.whl", hash = "sha256:b55126bcf084cc5fe0151bf465f3a5dedb5b5da0133d01227f75d0e71f9cfae5", size = 13226848, upload-time = "2026-02-06T18:55:49.631Z" },
+    { url = "https://files.pythonhosted.org/packages/89/b0/0ca436391f832fad365977623f3c08c4fbbf553fd9a112604aa106646654/tensorstore-0.1.81-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:a48c23e4df50681d8f4f365b08a0beb114ab210accbde9f34d37fd7b45c31005", size = 16525537, upload-time = "2026-02-06T18:55:51.708Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/02/c10052b86cf8d47b4cf41e5f139b4003c69bb69e506759b0eb87b873d213/tensorstore-0.1.81-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0be0ce646263820f3d4c9ba738d8e9be7da241cbe093ca2fd02e25023344347c", size = 14547490, upload-time = "2026-02-06T18:55:53.899Z" },
+    { url = "https://files.pythonhosted.org/packages/01/d1/bd86c46367624522967e896ca45d77ba9085de3f15081fdad6576ba70aa9/tensorstore-0.1.81-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93996e756dce82589f5a19e27b4e7c0b5b40221a7e41ddce46dc13d378dbd157", size = 19050938, upload-time = "2026-02-06T18:55:56.123Z" },
+    { url = "https://files.pythonhosted.org/packages/11/a2/59a8e9a33cd9e17461f918bda4a20712ed3c51c52e0e42b2f673441bc90d/tensorstore-0.1.81-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:444c088919a739c20ca1f87935d72de4fd87605eb2c0f093b8d49251b7884aef", size = 21055275, upload-time = "2026-02-06T18:55:58.259Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/ec/2988f210729b523975b1bee030cabd64b256943c08463331598f1e03bd4f/tensorstore-0.1.81-cp314-cp314-win_amd64.whl", hash = "sha256:f7aa0a3a470c4d832faff7d77dd688b1d352b718d110c95ceba54ec637ca3ffa", size = 13614713, upload-time = "2026-02-06T18:56:00.291Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/5d/60e990df3f1dc57c33644375a0eccb906a79fd8a5e2d81238f856c65ad7f/tensorstore-0.1.81-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:6c36d8a827120aa15e50ec5c36dd7e73978d86ba4f46d073fb648d8dda3948e9", size = 16605091, upload-time = "2026-02-06T18:56:02.807Z" },
+    { url = "https://files.pythonhosted.org/packages/85/22/f599576815227735d3e34f86f05a8b39d8b15fd979d0029383ebae23978d/tensorstore-0.1.81-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3c31d831707c4ff3c6ecdcba129f7c39e982572837b2f93e02ccb83fc8581bca", size = 14631573, upload-time = "2026-02-06T18:56:04.892Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/76/b5d0b424b7af057a3d4de3f312eba9ddf8a3c750a766b42e0b7f6c2ebef0/tensorstore-0.1.81-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9fba383f108d7450bf9a03487ac7fa3bb2c3080c91cee9d2da3bb217b560846b", size = 19065251, upload-time = "2026-02-06T18:56:06.972Z" },
+    { url = "https://files.pythonhosted.org/packages/54/6c/0f113eae73b1e8eb2f712cf5f1efd269452f0f0045158fae43ce7b4701b4/tensorstore-0.1.81-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f88c52f592e2982682045199cabf360462146749d48b7be2969cd640e877c6c3", size = 21066488, upload-time = "2026-02-06T18:56:10.236Z" },
 ]
 
 [[package]]
@@ -6933,31 +6693,15 @@ name = "torch"
 version = "2.10.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cuda-bindings", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "filelock" },
-    { name = "fsspec" },
-    { name = "jinja2" },
-    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "setuptools", marker = "python_full_version >= '3.12'" },
-    { name = "sympy" },
+    { name = "filelock", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
+    { name = "jinja2", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
+    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "sympy", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
     { name = "triton", marker = "sys_platform == 'never'" },
-    { name = "typing-extensions" },
+    { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/5b/30/bfebdd8ec77db9a79775121789992d6b3b75ee5494971294d7b4b7c999bc/torch-2.10.0-2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2b980edd8d7c0a68c4e951ee1856334a43193f98730d97408fbd148c1a933313", size = 79411457, upload-time = "2026-02-10T21:44:59.189Z" },
@@ -7009,7 +6753,8 @@ dependencies = [
     { name = "docker" },
     { name = "docstring-parser" },
     { name = "filelock" },
-    { name = "fsspec" },
+    { name = "fsspec", version = "2025.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" },
+    { name = "fsspec", version = "2026.2.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "importlib-metadata" },
     { name = "pyre-extensions" },
     { name = "pyyaml" },
@@ -7035,8 +6780,8 @@ wheels = [
 
 [[package]]
 name = "transformer-engine"
-version = "2.13.0+28777046"
-source = { git = "https://github.com/NVIDIA/TransformerEngine.git?rev=287770466f0f4433052260a765db5ff7b8be1320#287770466f0f4433052260a765db5ff7b8be1320" }
+version = "2.12.0+5671fd36"
+source = { git = "https://github.com/NVIDIA/TransformerEngine.git?rev=5671fd3675906cda1ade26c24a65d3dedd88eb89#5671fd3675906cda1ade26c24a65d3dedd88eb89" }
 dependencies = [
     { name = "einops" },
     { name = "importlib-metadata" },
@@ -7056,7 +6801,7 @@ dependencies = [
     { name = "filelock" },
     { name = "huggingface-hub" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "regex" },
@@ -7169,37 +6914,36 @@ wheels = [
 
 [[package]]
 name = "uvicorn"
-version = "0.42.0"
+version = "0.41.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
     { name = "h11" },
     { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e3/ad/4a96c425be6fb67e0621e62d86c402b4a17ab2be7f7c055d9bd2f638b9e2/uvicorn-0.42.0.tar.gz", hash = "sha256:9b1f190ce15a2dd22e7758651d9b6d12df09a13d51ba5bf4fc33c383a48e1775", size = 85393, upload-time = "2026-03-16T06:19:50.077Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/32/ce/eeb58ae4ac36fe09e3842eb02e0eb676bf2c53ae062b98f1b2531673efdd/uvicorn-0.41.0.tar.gz", hash = "sha256:09d11cf7008da33113824ee5a1c6422d89fbc2ff476540d69a34c87fab8b571a", size = 82633, upload-time = "2026-02-16T23:07:24.1Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0a/89/f8827ccff89c1586027a105e5630ff6139a64da2515e24dafe860bd9ae4d/uvicorn-0.42.0-py3-none-any.whl", hash = "sha256:96c30f5c7abe6f74ae8900a70e92b85ad6613b745d4879eb9b16ccad15645359", size = 68830, upload-time = "2026-03-16T06:19:48.325Z" },
+    { url = "https://files.pythonhosted.org/packages/83/e4/d04a086285c20886c0daad0e026f250869201013d18f81d9ff5eada73a88/uvicorn-0.41.0-py3-none-any.whl", hash = "sha256:29e35b1d2c36a04b9e180d4007ede3bcb32a85fbdfd6c6aeb3f26839de088187", size = 68783, upload-time = "2026-02-16T23:07:22.357Z" },
 ]
 
 [[package]]
 name = "virtualenv"
-version = "21.2.0"
+version = "20.39.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "distlib" },
     { name = "filelock" },
     { name = "platformdirs" },
-    { name = "python-discovery" },
     { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/aa/92/58199fe10049f9703c2666e809c4f686c54ef0a68b0f6afccf518c0b1eb9/virtualenv-21.2.0.tar.gz", hash = "sha256:1720dc3a62ef5b443092e3f499228599045d7fea4c79199770499df8becf9098", size = 5840618, upload-time = "2026-03-09T17:24:38.013Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ed/54/809199edc537dbace273495ac0884d13df26436e910a5ed4d0ec0a69806b/virtualenv-20.39.0.tar.gz", hash = "sha256:a15f0cebd00d50074fd336a169d53422436a12dfe15149efec7072cfe817df8b", size = 5869141, upload-time = "2026-02-23T18:09:13.349Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c6/59/7d02447a55b2e55755011a647479041bc92a82e143f96a8195cb33bd0a1c/virtualenv-21.2.0-py3-none-any.whl", hash = "sha256:1bd755b504931164a5a496d217c014d098426cddc79363ad66ac78125f9d908f", size = 5825084, upload-time = "2026-03-09T17:24:35.378Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/b4/8268da45f26f4fe84f6eae80a6ca1485ffb490a926afecff75fc48f61979/virtualenv-20.39.0-py3-none-any.whl", hash = "sha256:44888bba3775990a152ea1f73f8e5f566d49f11bbd1de61d426fd7732770043e", size = 5839121, upload-time = "2026-02-23T18:09:11.173Z" },
 ]
 
 [[package]]
 name = "wandb"
-version = "0.25.1"
+version = "0.25.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -7213,17 +6957,17 @@ dependencies = [
     { name = "sentry-sdk" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/60/bb/eb579bf9abac70934a014a9d4e45346aab307994f3021d201bebe5fa25ec/wandb-0.25.1.tar.gz", hash = "sha256:b2a95cd777ecbe7499599a43158834983448a0048329bc7210ef46ca18d21994", size = 43983308, upload-time = "2026-03-10T23:51:44.227Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/fd/60/d94952549920469524b689479c864c692ca47eca4b8c2fe3389b64a58778/wandb-0.25.0.tar.gz", hash = "sha256:45840495a288e34245d69d07b5a0b449220fbc5b032e6b51c4f92ec9026d2ad1", size = 43951335, upload-time = "2026-02-13T00:17:45.515Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e7/d8/873553b6818499d1b1de314067d528b892897baf0dc81fedc0e845abc2dd/wandb-0.25.1-py3-none-macosx_12_0_arm64.whl", hash = "sha256:9bb0679a3e2dcd96db9d9b6d3e17d046241d8d122974b24facb85cc93309a8c9", size = 23615900, upload-time = "2026-03-10T23:51:06.278Z" },
-    { url = "https://files.pythonhosted.org/packages/71/ea/b131f319aaa5d0bf7572b6bfcff3dd89e1cf92b17eee443bbab71d12d74c/wandb-0.25.1-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:0fb13ed18914027523e7b4fc20380c520e0d10da0ee452f924a13f84509fbe12", size = 25576144, upload-time = "2026-03-10T23:51:11.527Z" },
-    { url = "https://files.pythonhosted.org/packages/70/5f/81508581f0bb77b0495665c1c78e77606a48e66e855ca71ba7c8ae29efa4/wandb-0.25.1-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:cc4521eb5223429ddab5e8eee9b42fdf4caabdf0bc4e0e809042720e5fbef0ed", size = 23070425, upload-time = "2026-03-10T23:51:15.71Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/c7/445155ef010e2e35d190797d7c36ff441e062a5b566a6da4778e22233395/wandb-0.25.1-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:e73b4c55b947edae349232d5845204d30fac88e18eb4ad1d4b96bf7cf898405a", size = 25628142, upload-time = "2026-03-10T23:51:19.326Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/63/f5c55ee00cf481ef1ccd3c385a0585ad52e7840d08419d4f82ddbeeea959/wandb-0.25.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:22b84065aa398e1624d2e5ad79e08bc4d2af41a6db61697b03b3aaba332977c6", size = 23123172, upload-time = "2026-03-10T23:51:23.418Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/d9/19eb7974c0e9253bcbaee655222c0f0e1a52e63e9479ee711b4208f8ac31/wandb-0.25.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:005c4c6b5126ef8f4b4110e5372d950918b00637d6dc4b615ad17445f9739478", size = 25714479, upload-time = "2026-03-10T23:51:27.421Z" },
-    { url = "https://files.pythonhosted.org/packages/11/19/466c1d03323a4a0ed7d4036a59b18d6b6f67cb5032e444205927e226b18d/wandb-0.25.1-py3-none-win32.whl", hash = "sha256:8f2d04f16b88d65bfba9d79fb945f6c64e2686215469a841936e0972be8ec6a5", size = 24967338, upload-time = "2026-03-10T23:51:31.833Z" },
-    { url = "https://files.pythonhosted.org/packages/89/22/680d34c1587f3a979c701b66d71aa7c42b4ef2fdf0774f67034e618e834e/wandb-0.25.1-py3-none-win_amd64.whl", hash = "sha256:62db5166de14456156d7a85953a58733a631228e6d4248a753605f75f75fb845", size = 24967343, upload-time = "2026-03-10T23:51:36.026Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/e8/76836b75d401ff5912aaf513176e64557ceaec4c4946bfd38a698ff84d48/wandb-0.25.1-py3-none-win_arm64.whl", hash = "sha256:cc7c34b70cf4b7be4d395541e82e325fd9d2be978d62c9ec01f1a7141523b6bb", size = 22080774, upload-time = "2026-03-10T23:51:40.196Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/7d/0c131db3ec9deaabbd32263d90863cbfbe07659527e11c35a5c738cecdc5/wandb-0.25.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:5eecb3c7b5e60d1acfa4b056bfbaa0b79a482566a9db58c9f99724b3862bc8e5", size = 23287536, upload-time = "2026-02-13T00:17:20.265Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/95/31bb7f76a966ec87495e5a72ac7570685be162494c41757ac871768dbc4f/wandb-0.25.0-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:daeedaadb183dc466e634fba90ab2bab1d4e93000912be0dee95065a0624a3fd", size = 25196062, upload-time = "2026-02-13T00:17:23.356Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/a1/258cdedbf30cebc692198a774cf0ef945b7ed98ee64bdaf62621281c95d8/wandb-0.25.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:5e0127dbcef13eea48f4b84268da7004d34d3120ebc7b2fa9cefb72b49dbb825", size = 22799744, upload-time = "2026-02-13T00:17:26.437Z" },
+    { url = "https://files.pythonhosted.org/packages/de/91/ec9465d014cfd199c5b2083d271d31b3c2aedeae66f3d8a0712f7f54bdf3/wandb-0.25.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:6c4c38077836f9b7569a35b0e1dcf1f0c43616fcd936d182f475edbfea063665", size = 25262839, upload-time = "2026-02-13T00:17:28.8Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/95/cb2d1c7143f534544147fb53fe87944508b8cb9a058bc5b6f8a94adbee15/wandb-0.25.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6edd8948d305cb73745bf564b807bd73da2ccbd47c548196b8a362f7df40aed8", size = 22853714, upload-time = "2026-02-13T00:17:31.68Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/94/68163f70c1669edcf130822aaaea782d8198b5df74443eca0085ec596774/wandb-0.25.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ada6f08629bb014ad6e0a19d5dec478cdaa116431baa3f0a4bf4ab8d9893611f", size = 25358037, upload-time = "2026-02-13T00:17:34.676Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/fb/9578eed2c01b2fc6c8b693da110aa9c73a33d7bb556480f5cfc42e48c94e/wandb-0.25.0-py3-none-win32.whl", hash = "sha256:020b42ca4d76e347709d65f59b30d4623a115edc28f462af1c92681cb17eae7c", size = 24604118, upload-time = "2026-02-13T00:17:37.641Z" },
+    { url = "https://files.pythonhosted.org/packages/25/97/460f6cb738aaa39b4eb2e6b4c630b2ae4321cdd70a79d5955ea75a878981/wandb-0.25.0-py3-none-win_amd64.whl", hash = "sha256:78307ac0b328f2dc334c8607bec772851215584b62c439eb320c4af4fb077a00", size = 24604122, upload-time = "2026-02-13T00:17:39.991Z" },
+    { url = "https://files.pythonhosted.org/packages/27/6c/5847b4dda1dfd52630dac08711d4348c69ed657f0698fc2d949c7f7a6622/wandb-0.25.0-py3-none-win_arm64.whl", hash = "sha256:c6174401fd6fb726295e98d57b4231c100eca96bd17de51bfc64038a57230aaf", size = 21785298, upload-time = "2026-02-13T00:17:42.475Z" },
 ]
 
 [[package]]
@@ -7357,7 +7101,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "braceexpand" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
+    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" },
     { name = "pyyaml" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5a/3a/68800d92e065cf4750ebecf973b13979c0c929b439e1293012938862038d/webdataset-1.0.2.tar.gz", hash = "sha256:7f0498be827cfa46cc5430a58768a24e2c6a410676a61be1838f53d61afdaab4", size = 80090, upload-time = "2025-06-19T23:26:21.945Z" }
@@ -7462,88 +7206,166 @@ wheels = [
 
 [[package]]
 name = "wrapt"
-version = "2.1.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2e/64/925f213fdcbb9baeb1530449ac71a4d57fc361c053d06bf78d0c5c7cd80c/wrapt-2.1.2.tar.gz", hash = "sha256:3996a67eecc2c68fd47b4e3c564405a5777367adfd9b8abb58387b63ee83b21e", size = 81678, upload-time = "2026-03-06T02:53:25.134Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/da/d2/387594fb592d027366645f3d7cc9b4d7ca7be93845fbaba6d835a912ef3c/wrapt-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4b7a86d99a14f76facb269dc148590c01aaf47584071809a70da30555228158c", size = 60669, upload-time = "2026-03-06T02:52:40.671Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/18/3f373935bc5509e7ac444c8026a56762e50c1183e7061797437ca96c12ce/wrapt-2.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a819e39017f95bf7aede768f75915635aa8f671f2993c036991b8d3bfe8dbb6f", size = 61603, upload-time = "2026-03-06T02:54:21.032Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/7a/32758ca2853b07a887a4574b74e28843919103194bb47001a304e24af62f/wrapt-2.1.2-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5681123e60aed0e64c7d44f72bbf8b4ce45f79d81467e2c4c728629f5baf06eb", size = 113632, upload-time = "2026-03-06T02:53:54.121Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/d5/eeaa38f670d462e97d978b3b0d9ce06d5b91e54bebac6fbed867809216e7/wrapt-2.1.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b8b28e97a44d21836259739ae76284e180b18abbb4dcfdff07a415cf1016c3e", size = 115644, upload-time = "2026-03-06T02:54:53.33Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/09/2a41506cb17affb0bdf9d5e2129c8c19e192b388c4c01d05e1b14db23c00/wrapt-2.1.2-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cef91c95a50596fcdc31397eb6955476f82ae8a3f5a8eabdc13611b60ee380ba", size = 112016, upload-time = "2026-03-06T02:54:43.274Z" },
-    { url = "https://files.pythonhosted.org/packages/64/15/0e6c3f5e87caadc43db279724ee36979246d5194fa32fed489c73643ba59/wrapt-2.1.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dad63212b168de8569b1c512f4eac4b57f2c6934b30df32d6ee9534a79f1493f", size = 114823, upload-time = "2026-03-06T02:54:29.392Z" },
-    { url = "https://files.pythonhosted.org/packages/56/b2/0ad17c8248f4e57bedf44938c26ec3ee194715f812d2dbbd9d7ff4be6c06/wrapt-2.1.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:d307aa6888d5efab2c1cde09843d48c843990be13069003184b67d426d145394", size = 111244, upload-time = "2026-03-06T02:54:02.149Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/04/bcdba98c26f2c6522c7c09a726d5d9229120163493620205b2f76bd13c01/wrapt-2.1.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c87cf3f0c85e27b3ac7d9ad95da166bf8739ca215a8b171e8404a2d739897a45", size = 113307, upload-time = "2026-03-06T02:54:12.428Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/1b/5e2883c6bc14143924e465a6fc5a92d09eeabe35310842a481fb0581f832/wrapt-2.1.2-cp310-cp310-win32.whl", hash = "sha256:d1c5fea4f9fe3762e2b905fdd67df51e4be7a73b7674957af2d2ade71a5c075d", size = 57986, upload-time = "2026-03-06T02:54:26.823Z" },
-    { url = "https://files.pythonhosted.org/packages/42/5a/4efc997bccadd3af5749c250b49412793bc41e13a83a486b2b54a33e240c/wrapt-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:d8f7740e1af13dff2684e4d56fe604a7e04d6c94e737a60568d8d4238b9a0c71", size = 60336, upload-time = "2026-03-06T02:54:18Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/f5/a2bb833e20181b937e87c242645ed5d5aa9c373006b0467bfe1a35c727d0/wrapt-2.1.2-cp310-cp310-win_arm64.whl", hash = "sha256:1c6cc827c00dc839350155f316f1f8b4b0c370f52b6a19e782e2bda89600c7dc", size = 58757, upload-time = "2026-03-06T02:53:51.545Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/81/60c4471fce95afa5922ca09b88a25f03c93343f759aae0f31fb4412a85c7/wrapt-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:96159a0ee2b0277d44201c3b5be479a9979cf154e8c82fa5df49586a8e7679bb", size = 60666, upload-time = "2026-03-06T02:52:58.934Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/be/80e80e39e7cb90b006a0eaf11c73ac3a62bbfb3068469aec15cc0bc795de/wrapt-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:98ba61833a77b747901e9012072f038795de7fc77849f1faa965464f3f87ff2d", size = 61601, upload-time = "2026-03-06T02:53:00.487Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/be/d7c88cd9293c859fc74b232abdc65a229bb953997995d6912fc85af18323/wrapt-2.1.2-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:767c0dbbe76cae2a60dd2b235ac0c87c9cccf4898aef8062e57bead46b5f6894", size = 114057, upload-time = "2026-03-06T02:52:44.08Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/25/36c04602831a4d685d45a93b3abea61eca7fe35dab6c842d6f5d570ef94a/wrapt-2.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c691a6bc752c0cc4711cc0c00896fcd0f116abc253609ef64ef930032821842", size = 116099, upload-time = "2026-03-06T02:54:56.74Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/4e/98a6eb417ef551dc277bec1253d5246b25003cf36fdf3913b65cb7657a56/wrapt-2.1.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f3b7d73012ea75aee5844de58c88f44cf62d0d62711e39da5a82824a7c4626a8", size = 112457, upload-time = "2026-03-06T02:53:52.842Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/a6/a6f7186a5297cad8ec53fd7578533b28f795fdf5372368c74bd7e6e9841c/wrapt-2.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:577dff354e7acd9d411eaf4bfe76b724c89c89c8fc9b7e127ee28c5f7bcb25b6", size = 115351, upload-time = "2026-03-06T02:53:32.684Z" },
-    { url = "https://files.pythonhosted.org/packages/97/6f/06e66189e721dbebd5cf20e138acc4d1150288ce118462f2fcbff92d38db/wrapt-2.1.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:3d7b6fd105f8b24e5bd23ccf41cb1d1099796524bcc6f7fbb8fe576c44befbc9", size = 111748, upload-time = "2026-03-06T02:53:08.455Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/43/4808b86f499a51370fbdbdfa6cb91e9b9169e762716456471b619fca7a70/wrapt-2.1.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:866abdbf4612e0b34764922ef8b1c5668867610a718d3053d59e24a5e5fcfc15", size = 113783, upload-time = "2026-03-06T02:53:02.02Z" },
-    { url = "https://files.pythonhosted.org/packages/91/2c/a3f28b8fa7ac2cefa01cfcaca3471f9b0460608d012b693998cd61ef43df/wrapt-2.1.2-cp311-cp311-win32.whl", hash = "sha256:5a0a0a3a882393095573344075189eb2d566e0fd205a2b6414e9997b1b800a8b", size = 57977, upload-time = "2026-03-06T02:53:27.844Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/c3/2b1c7bd07a27b1db885a2fab469b707bdd35bddf30a113b4917a7e2139d2/wrapt-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:64a07a71d2730ba56f11d1a4b91f7817dc79bc134c11516b75d1921a7c6fcda1", size = 60336, upload-time = "2026-03-06T02:54:28.104Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/5c/76ece7b401b088daa6503d6264dd80f9a727df3e6042802de9a223084ea2/wrapt-2.1.2-cp311-cp311-win_arm64.whl", hash = "sha256:b89f095fe98bc12107f82a9f7d570dc83a0870291aeb6b1d7a7d35575f55d98a", size = 58756, upload-time = "2026-03-06T02:53:16.319Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/b6/1db817582c49c7fcbb7df6809d0f515af29d7c2fbf57eb44c36e98fb1492/wrapt-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ff2aad9c4cda28a8f0653fc2d487596458c2a3f475e56ba02909e950a9efa6a9", size = 61255, upload-time = "2026-03-06T02:52:45.663Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/16/9b02a6b99c09227c93cd4b73acc3678114154ec38da53043c0ddc1fba0dc/wrapt-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6433ea84e1cfacf32021d2a4ee909554ade7fd392caa6f7c13f1f4bf7b8e8748", size = 61848, upload-time = "2026-03-06T02:53:48.728Z" },
-    { url = "https://files.pythonhosted.org/packages/af/aa/ead46a88f9ec3a432a4832dfedb84092fc35af2d0ba40cd04aea3889f247/wrapt-2.1.2-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c20b757c268d30d6215916a5fa8461048d023865d888e437fab451139cad6c8e", size = 121433, upload-time = "2026-03-06T02:54:40.328Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/9f/742c7c7cdf58b59085a1ee4b6c37b013f66ac33673a7ef4aaed5e992bc33/wrapt-2.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79847b83eb38e70d93dc392c7c5b587efe65b3e7afcc167aa8abd5d60e8761c8", size = 123013, upload-time = "2026-03-06T02:53:26.58Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/44/2c3dd45d53236b7ed7c646fcf212251dc19e48e599debd3926b52310fafb/wrapt-2.1.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f8fba1bae256186a83d1875b2b1f4e2d1242e8fac0f58ec0d7e41b26967b965c", size = 117326, upload-time = "2026-03-06T02:53:11.547Z" },
-    { url = "https://files.pythonhosted.org/packages/74/e2/b17d66abc26bd96f89dec0ecd0ef03da4a1286e6ff793839ec431b9fae57/wrapt-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e3d3b35eedcf5f7d022291ecd7533321c4775f7b9cd0050a31a68499ba45757c", size = 121444, upload-time = "2026-03-06T02:54:09.5Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/62/e2977843fdf9f03daf1586a0ff49060b1b2fc7ff85a7ea82b6217c1ae36e/wrapt-2.1.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:6f2c5390460de57fa9582bc8a1b7a6c86e1a41dfad74c5225fc07044c15cc8d1", size = 116237, upload-time = "2026-03-06T02:54:03.884Z" },
-    { url = "https://files.pythonhosted.org/packages/88/dd/27fc67914e68d740bce512f11734aec08696e6b17641fef8867c00c949fc/wrapt-2.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7dfa9f2cf65d027b951d05c662cc99ee3bd01f6e4691ed39848a7a5fffc902b2", size = 120563, upload-time = "2026-03-06T02:53:20.412Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/9f/b750b3692ed2ef4705cb305bd68858e73010492b80e43d2a4faa5573cbe7/wrapt-2.1.2-cp312-cp312-win32.whl", hash = "sha256:eba8155747eb2cae4a0b913d9ebd12a1db4d860fc4c829d7578c7b989bd3f2f0", size = 58198, upload-time = "2026-03-06T02:53:37.732Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/b2/feecfe29f28483d888d76a48f03c4c4d8afea944dbee2b0cd3380f9df032/wrapt-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:1c51c738d7d9faa0b3601708e7e2eda9bf779e1b601dce6c77411f2a1b324a63", size = 60441, upload-time = "2026-03-06T02:52:47.138Z" },
-    { url = "https://files.pythonhosted.org/packages/44/e1/e328f605d6e208547ea9fd120804fcdec68536ac748987a68c47c606eea8/wrapt-2.1.2-cp312-cp312-win_arm64.whl", hash = "sha256:c8e46ae8e4032792eb2f677dbd0d557170a8e5524d22acc55199f43efedd39bf", size = 58836, upload-time = "2026-03-06T02:53:22.053Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/7a/d936840735c828b38d26a854e85d5338894cda544cb7a85a9d5b8b9c4df7/wrapt-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:787fd6f4d67befa6fe2abdffcbd3de2d82dfc6fb8a6d850407c53332709d030b", size = 61259, upload-time = "2026-03-06T02:53:41.922Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/88/9a9b9a90ac8ca11c2fdb6a286cb3a1fc7dd774c00ed70929a6434f6bc634/wrapt-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4bdf26e03e6d0da3f0e9422fd36bcebf7bc0eeb55fdf9c727a09abc6b9fe472e", size = 61851, upload-time = "2026-03-06T02:52:48.672Z" },
-    { url = "https://files.pythonhosted.org/packages/03/a9/5b7d6a16fd6533fed2756900fc8fc923f678179aea62ada6d65c92718c00/wrapt-2.1.2-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bbac24d879aa22998e87f6b3f481a5216311e7d53c7db87f189a7a0266dafffb", size = 121446, upload-time = "2026-03-06T02:54:14.013Z" },
-    { url = "https://files.pythonhosted.org/packages/45/bb/34c443690c847835cfe9f892be78c533d4f32366ad2888972c094a897e39/wrapt-2.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16997dfb9d67addc2e3f41b62a104341e80cac52f91110dece393923c0ebd5ca", size = 123056, upload-time = "2026-03-06T02:54:10.829Z" },
-    { url = "https://files.pythonhosted.org/packages/93/b9/ff205f391cb708f67f41ea148545f2b53ff543a7ac293b30d178af4d2271/wrapt-2.1.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:162e4e2ba7542da9027821cb6e7c5e068d64f9a10b5f15512ea28e954893a267", size = 117359, upload-time = "2026-03-06T02:53:03.623Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/3d/1ea04d7747825119c3c9a5e0874a40b33594ada92e5649347c457d982805/wrapt-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f29c827a8d9936ac320746747a016c4bc66ef639f5cd0d32df24f5eacbf9c69f", size = 121479, upload-time = "2026-03-06T02:53:45.844Z" },
-    { url = "https://files.pythonhosted.org/packages/78/cc/ee3a011920c7a023b25e8df26f306b2484a531ab84ca5c96260a73de76c0/wrapt-2.1.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:a9dd9813825f7ecb018c17fd147a01845eb330254dff86d3b5816f20f4d6aaf8", size = 116271, upload-time = "2026-03-06T02:54:46.356Z" },
-    { url = "https://files.pythonhosted.org/packages/98/fd/e5ff7ded41b76d802cf1191288473e850d24ba2e39a6ec540f21ae3b57cb/wrapt-2.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6f8dbdd3719e534860d6a78526aafc220e0241f981367018c2875178cf83a413", size = 120573, upload-time = "2026-03-06T02:52:50.163Z" },
-    { url = "https://files.pythonhosted.org/packages/47/c5/242cae3b5b080cd09bacef0591691ba1879739050cc7c801ff35c8886b66/wrapt-2.1.2-cp313-cp313-win32.whl", hash = "sha256:5c35b5d82b16a3bc6e0a04349b606a0582bc29f573786aebe98e0c159bc48db6", size = 58205, upload-time = "2026-03-06T02:53:47.494Z" },
-    { url = "https://files.pythonhosted.org/packages/12/69/c358c61e7a50f290958809b3c61ebe8b3838ea3e070d7aac9814f95a0528/wrapt-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:f8bc1c264d8d1cf5b3560a87bbdd31131573eb25f9f9447bb6252b8d4c44a3a1", size = 60452, upload-time = "2026-03-06T02:53:30.038Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/66/c8a6fcfe321295fd8c0ab1bd685b5a01462a9b3aa2f597254462fc2bc975/wrapt-2.1.2-cp313-cp313-win_arm64.whl", hash = "sha256:3beb22f674550d5634642c645aba4c72a2c66fb185ae1aebe1e955fae5a13baf", size = 58842, upload-time = "2026-03-06T02:52:52.114Z" },
-    { url = "https://files.pythonhosted.org/packages/da/55/9c7052c349106e0b3f17ae8db4b23a691a963c334de7f9dbd60f8f74a831/wrapt-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0fc04bc8664a8bc4c8e00b37b5355cffca2535209fba1abb09ae2b7c76ddf82b", size = 63075, upload-time = "2026-03-06T02:53:19.108Z" },
-    { url = "https://files.pythonhosted.org/packages/09/a8/ce7b4006f7218248dd71b7b2b732d0710845a0e49213b18faef64811ffef/wrapt-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a9b9d50c9af998875a1482a038eb05755dfd6fe303a313f6a940bb53a83c3f18", size = 63719, upload-time = "2026-03-06T02:54:33.452Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/e5/2ca472e80b9e2b7a17f106bb8f9df1db11e62101652ce210f66935c6af67/wrapt-2.1.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2d3ff4f0024dd224290c0eabf0240f1bfc1f26363431505fb1b0283d3b08f11d", size = 152643, upload-time = "2026-03-06T02:52:42.721Z" },
-    { url = "https://files.pythonhosted.org/packages/36/42/30f0f2cefca9d9cbf6835f544d825064570203c3e70aa873d8ae12e23791/wrapt-2.1.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3278c471f4468ad544a691b31bb856374fbdefb7fee1a152153e64019379f015", size = 158805, upload-time = "2026-03-06T02:54:25.441Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/67/d08672f801f604889dcf58f1a0b424fe3808860ede9e03affc1876b295af/wrapt-2.1.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a8914c754d3134a3032601c6984db1c576e6abaf3fc68094bb8ab1379d75ff92", size = 145990, upload-time = "2026-03-06T02:53:57.456Z" },
-    { url = "https://files.pythonhosted.org/packages/68/a7/fd371b02e73babec1de6ade596e8cd9691051058cfdadbfd62a5898f3295/wrapt-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ff95d4264e55839be37bafe1536db2ab2de19da6b65f9244f01f332b5286cfbf", size = 155670, upload-time = "2026-03-06T02:54:55.309Z" },
-    { url = "https://files.pythonhosted.org/packages/86/2d/9fe0095dfdb621009f40117dcebf41d7396c2c22dca6eac779f4c007b86c/wrapt-2.1.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:76405518ca4e1b76fbb1b9f686cff93aebae03920cc55ceeec48ff9f719c5f67", size = 144357, upload-time = "2026-03-06T02:54:24.092Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/b6/ec7b4a254abbe4cde9fa15c5d2cca4518f6b07d0f1b77d4ee9655e30280e/wrapt-2.1.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c0be8b5a74c5824e9359b53e7e58bef71a729bacc82e16587db1c4ebc91f7c5a", size = 150269, upload-time = "2026-03-06T02:53:31.268Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/6b/2fabe8ebf148f4ee3c782aae86a795cc68ffe7d432ef550f234025ce0cfa/wrapt-2.1.2-cp313-cp313t-win32.whl", hash = "sha256:f01277d9a5fc1862f26f7626da9cf443bebc0abd2f303f41c5e995b15887dabd", size = 59894, upload-time = "2026-03-06T02:54:15.391Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/fb/9ba66fc2dedc936de5f8073c0217b5d4484e966d87723415cc8262c5d9c2/wrapt-2.1.2-cp313-cp313t-win_amd64.whl", hash = "sha256:84ce8f1c2104d2f6daa912b1b5b039f331febfeee74f8042ad4e04992bd95c8f", size = 63197, upload-time = "2026-03-06T02:54:41.943Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/1c/012d7423c95d0e337117723eb8ecf73c622ce15a97847e84cf3f8f26cd7e/wrapt-2.1.2-cp313-cp313t-win_arm64.whl", hash = "sha256:a93cd767e37faeddbe07d8fc4212d5cba660af59bdb0f6372c93faaa13e6e679", size = 60363, upload-time = "2026-03-06T02:54:48.093Z" },
-    { url = "https://files.pythonhosted.org/packages/39/25/e7ea0b417db02bb796182a5316398a75792cd9a22528783d868755e1f669/wrapt-2.1.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:1370e516598854e5b4366e09ce81e08bfe94d42b0fd569b88ec46cc56d9164a9", size = 61418, upload-time = "2026-03-06T02:53:55.706Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/0f/fa539e2f6a770249907757eaeb9a5ff4deb41c026f8466c1c6d799088a9b/wrapt-2.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:6de1a3851c27e0bd6a04ca993ea6f80fc53e6c742ee1601f486c08e9f9b900a9", size = 61914, upload-time = "2026-03-06T02:52:53.37Z" },
-    { url = "https://files.pythonhosted.org/packages/53/37/02af1867f5b1441aaeda9c82deed061b7cd1372572ddcd717f6df90b5e93/wrapt-2.1.2-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:de9f1a2bbc5ac7f6012ec24525bdd444765a2ff64b5985ac6e0692144838542e", size = 120417, upload-time = "2026-03-06T02:54:30.74Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/b7/0138a6238c8ba7476c77cf786a807f871672b37f37a422970342308276e7/wrapt-2.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:970d57ed83fa040d8b20c52fe74a6ae7e3775ae8cff5efd6a81e06b19078484c", size = 122797, upload-time = "2026-03-06T02:54:51.539Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/ad/819ae558036d6a15b7ed290d5b14e209ca795dd4da9c58e50c067d5927b0/wrapt-2.1.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3969c56e4563c375861c8df14fa55146e81ac11c8db49ea6fb7f2ba58bc1ff9a", size = 117350, upload-time = "2026-03-06T02:54:37.651Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/2d/afc18dc57a4600a6e594f77a9ae09db54f55ba455440a54886694a84c71b/wrapt-2.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:57d7c0c980abdc5f1d98b11a2aa3bb159790add80258c717fa49a99921456d90", size = 121223, upload-time = "2026-03-06T02:54:35.221Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/5b/5ec189b22205697bc56eb3b62aed87a1e0423e9c8285d0781c7a83170d15/wrapt-2.1.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:776867878e83130c7a04237010463372e877c1c994d449ca6aaafeab6aab2586", size = 116287, upload-time = "2026-03-06T02:54:19.654Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/2d/f84939a7c9b5e6cdd8a8d0f6a26cabf36a0f7e468b967720e8b0cd2bdf69/wrapt-2.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:fab036efe5464ec3291411fabb80a7a39e2dd80bae9bcbeeca5087fdfa891e19", size = 119593, upload-time = "2026-03-06T02:54:16.697Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/fe/ccd22a1263159c4ac811ab9374c061bcb4a702773f6e06e38de5f81a1bdc/wrapt-2.1.2-cp314-cp314-win32.whl", hash = "sha256:e6ed62c82ddf58d001096ae84ce7f833db97ae2263bff31c9b336ba8cfe3f508", size = 58631, upload-time = "2026-03-06T02:53:06.498Z" },
-    { url = "https://files.pythonhosted.org/packages/65/0a/6bd83be7bff2e7efaac7b4ac9748da9d75a34634bbbbc8ad077d527146df/wrapt-2.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:467e7c76315390331c67073073d00662015bb730c566820c9ca9b54e4d67fd04", size = 60875, upload-time = "2026-03-06T02:53:50.252Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/c0/0b3056397fe02ff80e5a5d72d627c11eb885d1ca78e71b1a5c1e8c7d45de/wrapt-2.1.2-cp314-cp314-win_arm64.whl", hash = "sha256:da1f00a557c66225d53b095a97eace0fc5349e3bfda28fa34ffae238978ee575", size = 59164, upload-time = "2026-03-06T02:53:59.128Z" },
-    { url = "https://files.pythonhosted.org/packages/71/ed/5d89c798741993b2371396eb9d4634f009ff1ad8a6c78d366fe2883ea7a6/wrapt-2.1.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:62503ffbc2d3a69891cf29beeaccdb4d5e0a126e2b6a851688d4777e01428dbb", size = 63163, upload-time = "2026-03-06T02:52:54.873Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/8c/05d277d182bf36b0a13d6bd393ed1dec3468a25b59d01fba2dd70fe4d6ae/wrapt-2.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c7e6cd120ef837d5b6f860a6ea3745f8763805c418bb2f12eeb1fa6e25f22d22", size = 63723, upload-time = "2026-03-06T02:52:56.374Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/27/6c51ec1eff4413c57e72d6106bb8dec6f0c7cdba6503d78f0fa98767bcc9/wrapt-2.1.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3769a77df8e756d65fbc050333f423c01ae012b4f6731aaf70cf2bef61b34596", size = 152652, upload-time = "2026-03-06T02:53:23.79Z" },
-    { url = "https://files.pythonhosted.org/packages/db/4c/d7dd662d6963fc7335bfe29d512b02b71cdfa23eeca7ab3ac74a67505deb/wrapt-2.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a76d61a2e851996150ba0f80582dd92a870643fa481f3b3846f229de88caf044", size = 158807, upload-time = "2026-03-06T02:53:35.742Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/4d/1e5eea1a78d539d346765727422976676615814029522c76b87a95f6bcdd/wrapt-2.1.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6f97edc9842cf215312b75fe737ee7c8adda75a89979f8e11558dfff6343cc4b", size = 146061, upload-time = "2026-03-06T02:52:57.574Z" },
-    { url = "https://files.pythonhosted.org/packages/89/bc/62cabea7695cd12a288023251eeefdcb8465056ddaab6227cb78a2de005b/wrapt-2.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4006c351de6d5007aa33a551f600404ba44228a89e833d2fadc5caa5de8edfbf", size = 155667, upload-time = "2026-03-06T02:53:39.422Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/99/6f2888cd68588f24df3a76572c69c2de28287acb9e1972bf0c83ce97dbc1/wrapt-2.1.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a9372fc3639a878c8e7d87e1556fa209091b0a66e912c611e3f833e2c4202be2", size = 144392, upload-time = "2026-03-06T02:54:22.41Z" },
-    { url = "https://files.pythonhosted.org/packages/40/51/1dfc783a6c57971614c48e361a82ca3b6da9055879952587bc99fe1a7171/wrapt-2.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3144b027ff30cbd2fca07c0a87e67011adb717eb5f5bd8496325c17e454257a3", size = 150296, upload-time = "2026-03-06T02:54:07.848Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/38/cbb8b933a0201076c1f64fc42883b0023002bdc14a4964219154e6ff3350/wrapt-2.1.2-cp314-cp314t-win32.whl", hash = "sha256:3b8d15e52e195813efe5db8cec156eebe339aaf84222f4f4f051a6c01f237ed7", size = 60539, upload-time = "2026-03-06T02:54:00.594Z" },
-    { url = "https://files.pythonhosted.org/packages/82/dd/e5176e4b241c9f528402cebb238a36785a628179d7d8b71091154b3e4c9e/wrapt-2.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:08ffa54146a7559f5b8df4b289b46d963a8e74ed16ba3687f99896101a3990c5", size = 63969, upload-time = "2026-03-06T02:54:39Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/99/79f17046cf67e4a95b9987ea129632ba8bcec0bc81f3fb3d19bdb0bd60cd/wrapt-2.1.2-cp314-cp314t-win_arm64.whl", hash = "sha256:72aaa9d0d8e4ed0e2e98019cea47a21f823c9dd4b43c7b77bba6679ffcca6a00", size = 60554, upload-time = "2026-03-06T02:53:14.132Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/c7/8528ac2dfa2c1e6708f647df7ae144ead13f0a31146f43c7264b4942bf12/wrapt-2.1.2-py3-none-any.whl", hash = "sha256:b8fd6fa2b2c4e7621808f8c62e8317f4aae56e59721ad933bac5239d913cf0e8", size = 43993, upload-time = "2026-03-06T02:53:12.905Z" },
+version = "1.17.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and sys_platform == 'linux'",
+    "python_full_version == '3.12.*' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform == 'win32'",
+    "python_full_version >= '3.14' and sys_platform == 'emscripten'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform == 'win32'",
+    "python_full_version == '3.12.*' and sys_platform == 'win32'",
+    "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
+    "python_full_version == '3.12.*' and sys_platform == 'emscripten'",
+    "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'linux'",
+    "python_full_version == '3.11.*' and sys_platform == 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
+    "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.11' and sys_platform == 'linux'",
+    "python_full_version < '3.11' and sys_platform != 'linux'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3f/23/bb82321b86411eb51e5a5db3fb8f8032fd30bd7c2d74bfe936136b2fa1d6/wrapt-1.17.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88bbae4d40d5a46142e70d58bf664a89b6b4befaea7b2ecc14e03cedb8e06c04", size = 53482, upload-time = "2025-08-12T05:51:44.467Z" },
+    { url = "https://files.pythonhosted.org/packages/45/69/f3c47642b79485a30a59c63f6d739ed779fb4cc8323205d047d741d55220/wrapt-1.17.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6b13af258d6a9ad602d57d889f83b9d5543acd471eee12eb51f5b01f8eb1bc2", size = 38676, upload-time = "2025-08-12T05:51:32.636Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/71/e7e7f5670c1eafd9e990438e69d8fb46fa91a50785332e06b560c869454f/wrapt-1.17.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd341868a4b6714a5962c1af0bd44f7c404ef78720c7de4892901e540417111c", size = 38957, upload-time = "2025-08-12T05:51:54.655Z" },
+    { url = "https://files.pythonhosted.org/packages/de/17/9f8f86755c191d6779d7ddead1a53c7a8aa18bccb7cea8e7e72dfa6a8a09/wrapt-1.17.3-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f9b2601381be482f70e5d1051a5965c25fb3625455a2bf520b5a077b22afb775", size = 81975, upload-time = "2025-08-12T05:52:30.109Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/15/dd576273491f9f43dd09fce517f6c2ce6eb4fe21681726068db0d0467096/wrapt-1.17.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:343e44b2a8e60e06a7e0d29c1671a0d9951f59174f3709962b5143f60a2a98bd", size = 83149, upload-time = "2025-08-12T05:52:09.316Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/c4/5eb4ce0d4814521fee7aa806264bf7a114e748ad05110441cd5b8a5c744b/wrapt-1.17.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:33486899acd2d7d3066156b03465b949da3fd41a5da6e394ec49d271baefcf05", size = 82209, upload-time = "2025-08-12T05:52:10.331Z" },
+    { url = "https://files.pythonhosted.org/packages/31/4b/819e9e0eb5c8dc86f60dfc42aa4e2c0d6c3db8732bce93cc752e604bb5f5/wrapt-1.17.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e6f40a8aa5a92f150bdb3e1c44b7e98fb7113955b2e5394122fa5532fec4b418", size = 81551, upload-time = "2025-08-12T05:52:31.137Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/83/ed6baf89ba3a56694700139698cf703aac9f0f9eb03dab92f57551bd5385/wrapt-1.17.3-cp310-cp310-win32.whl", hash = "sha256:a36692b8491d30a8c75f1dfee65bef119d6f39ea84ee04d9f9311f83c5ad9390", size = 36464, upload-time = "2025-08-12T05:53:01.204Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/90/ee61d36862340ad7e9d15a02529df6b948676b9a5829fd5e16640156627d/wrapt-1.17.3-cp310-cp310-win_amd64.whl", hash = "sha256:afd964fd43b10c12213574db492cb8f73b2f0826c8df07a68288f8f19af2ebe6", size = 38748, upload-time = "2025-08-12T05:53:00.209Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/c3/cefe0bd330d389c9983ced15d326f45373f4073c9f4a8c2f99b50bfea329/wrapt-1.17.3-cp310-cp310-win_arm64.whl", hash = "sha256:af338aa93554be859173c39c85243970dc6a289fa907402289eeae7543e1ae18", size = 36810, upload-time = "2025-08-12T05:52:51.906Z" },
+    { url = "https://files.pythonhosted.org/packages/52/db/00e2a219213856074a213503fdac0511203dceefff26e1daa15250cc01a0/wrapt-1.17.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:273a736c4645e63ac582c60a56b0acb529ef07f78e08dc6bfadf6a46b19c0da7", size = 53482, upload-time = "2025-08-12T05:51:45.79Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/30/ca3c4a5eba478408572096fe9ce36e6e915994dd26a4e9e98b4f729c06d9/wrapt-1.17.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5531d911795e3f935a9c23eb1c8c03c211661a5060aab167065896bbf62a5f85", size = 38674, upload-time = "2025-08-12T05:51:34.629Z" },
+    { url = "https://files.pythonhosted.org/packages/31/25/3e8cc2c46b5329c5957cec959cb76a10718e1a513309c31399a4dad07eb3/wrapt-1.17.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0610b46293c59a3adbae3dee552b648b984176f8562ee0dba099a56cfbe4df1f", size = 38959, upload-time = "2025-08-12T05:51:56.074Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/8f/a32a99fc03e4b37e31b57cb9cefc65050ea08147a8ce12f288616b05ef54/wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b32888aad8b6e68f83a8fdccbf3165f5469702a7544472bdf41f582970ed3311", size = 82376, upload-time = "2025-08-12T05:52:32.134Z" },
+    { url = "https://files.pythonhosted.org/packages/31/57/4930cb8d9d70d59c27ee1332a318c20291749b4fba31f113c2f8ac49a72e/wrapt-1.17.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cccf4f81371f257440c88faed6b74f1053eef90807b77e31ca057b2db74edb1", size = 83604, upload-time = "2025-08-12T05:52:11.663Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/f3/1afd48de81d63dd66e01b263a6fbb86e1b5053b419b9b33d13e1f6d0f7d0/wrapt-1.17.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8a210b158a34164de8bb68b0e7780041a903d7b00c87e906fb69928bf7890d5", size = 82782, upload-time = "2025-08-12T05:52:12.626Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/d7/4ad5327612173b144998232f98a85bb24b60c352afb73bc48e3e0d2bdc4e/wrapt-1.17.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:79573c24a46ce11aab457b472efd8d125e5a51da2d1d24387666cd85f54c05b2", size = 82076, upload-time = "2025-08-12T05:52:33.168Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/59/e0adfc831674a65694f18ea6dc821f9fcb9ec82c2ce7e3d73a88ba2e8718/wrapt-1.17.3-cp311-cp311-win32.whl", hash = "sha256:c31eebe420a9a5d2887b13000b043ff6ca27c452a9a22fa71f35f118e8d4bf89", size = 36457, upload-time = "2025-08-12T05:53:03.936Z" },
+    { url = "https://files.pythonhosted.org/packages/83/88/16b7231ba49861b6f75fc309b11012ede4d6b0a9c90969d9e0db8d991aeb/wrapt-1.17.3-cp311-cp311-win_amd64.whl", hash = "sha256:0b1831115c97f0663cb77aa27d381237e73ad4f721391a9bfb2fe8bc25fa6e77", size = 38745, upload-time = "2025-08-12T05:53:02.885Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/1e/c4d4f3398ec073012c51d1c8d87f715f56765444e1a4b11e5180577b7e6e/wrapt-1.17.3-cp311-cp311-win_arm64.whl", hash = "sha256:5a7b3c1ee8265eb4c8f1b7d29943f195c00673f5ab60c192eba2d4a7eae5f46a", size = 36806, upload-time = "2025-08-12T05:52:53.368Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/41/cad1aba93e752f1f9268c77270da3c469883d56e2798e7df6240dcb2287b/wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", size = 53998, upload-time = "2025-08-12T05:51:47.138Z" },
+    { url = "https://files.pythonhosted.org/packages/60/f8/096a7cc13097a1869fe44efe68dace40d2a16ecb853141394047f0780b96/wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", size = 39020, upload-time = "2025-08-12T05:51:35.906Z" },
+    { url = "https://files.pythonhosted.org/packages/33/df/bdf864b8997aab4febb96a9ae5c124f700a5abd9b5e13d2a3214ec4be705/wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", size = 39098, upload-time = "2025-08-12T05:51:57.474Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/81/5d931d78d0eb732b95dc3ddaeeb71c8bb572fb01356e9133916cd729ecdd/wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", size = 88036, upload-time = "2025-08-12T05:52:34.784Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/38/2e1785df03b3d72d34fc6252d91d9d12dc27a5c89caef3335a1bbb8908ca/wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", size = 88156, upload-time = "2025-08-12T05:52:13.599Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/8b/48cdb60fe0603e34e05cffda0b2a4adab81fd43718e11111a4b0100fd7c1/wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", size = 87102, upload-time = "2025-08-12T05:52:14.56Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/51/d81abca783b58f40a154f1b2c56db1d2d9e0d04fa2d4224e357529f57a57/wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", size = 87732, upload-time = "2025-08-12T05:52:36.165Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/b1/43b286ca1392a006d5336412d41663eeef1ad57485f3e52c767376ba7e5a/wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", size = 36705, upload-time = "2025-08-12T05:53:07.123Z" },
+    { url = "https://files.pythonhosted.org/packages/28/de/49493f962bd3c586ab4b88066e967aa2e0703d6ef2c43aa28cb83bf7b507/wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", size = 38877, upload-time = "2025-08-12T05:53:05.436Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/48/0f7102fe9cb1e8a5a77f80d4f0956d62d97034bbe88d33e94699f99d181d/wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", size = 36885, upload-time = "2025-08-12T05:52:54.367Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/f6/759ece88472157acb55fc195e5b116e06730f1b651b5b314c66291729193/wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0", size = 54003, upload-time = "2025-08-12T05:51:48.627Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/a9/49940b9dc6d47027dc850c116d79b4155f15c08547d04db0f07121499347/wrapt-1.17.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77", size = 39025, upload-time = "2025-08-12T05:51:37.156Z" },
+    { url = "https://files.pythonhosted.org/packages/45/35/6a08de0f2c96dcdd7fe464d7420ddb9a7655a6561150e5fc4da9356aeaab/wrapt-1.17.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7", size = 39108, upload-time = "2025-08-12T05:51:58.425Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/37/6faf15cfa41bf1f3dba80cd3f5ccc6622dfccb660ab26ed79f0178c7497f/wrapt-1.17.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277", size = 88072, upload-time = "2025-08-12T05:52:37.53Z" },
+    { url = "https://files.pythonhosted.org/packages/78/f2/efe19ada4a38e4e15b6dff39c3e3f3f73f5decf901f66e6f72fe79623a06/wrapt-1.17.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d", size = 88214, upload-time = "2025-08-12T05:52:15.886Z" },
+    { url = "https://files.pythonhosted.org/packages/40/90/ca86701e9de1622b16e09689fc24b76f69b06bb0150990f6f4e8b0eeb576/wrapt-1.17.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa", size = 87105, upload-time = "2025-08-12T05:52:17.914Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/e0/d10bd257c9a3e15cbf5523025252cc14d77468e8ed644aafb2d6f54cb95d/wrapt-1.17.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050", size = 87766, upload-time = "2025-08-12T05:52:39.243Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/cf/7d848740203c7b4b27eb55dbfede11aca974a51c3d894f6cc4b865f42f58/wrapt-1.17.3-cp313-cp313-win32.whl", hash = "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8", size = 36711, upload-time = "2025-08-12T05:53:10.074Z" },
+    { url = "https://files.pythonhosted.org/packages/57/54/35a84d0a4d23ea675994104e667ceff49227ce473ba6a59ba2c84f250b74/wrapt-1.17.3-cp313-cp313-win_amd64.whl", hash = "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb", size = 38885, upload-time = "2025-08-12T05:53:08.695Z" },
+    { url = "https://files.pythonhosted.org/packages/01/77/66e54407c59d7b02a3c4e0af3783168fff8e5d61def52cda8728439d86bc/wrapt-1.17.3-cp313-cp313-win_arm64.whl", hash = "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16", size = 36896, upload-time = "2025-08-12T05:52:55.34Z" },
+    { url = "https://files.pythonhosted.org/packages/02/a2/cd864b2a14f20d14f4c496fab97802001560f9f41554eef6df201cd7f76c/wrapt-1.17.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cf30f6e3c077c8e6a9a7809c94551203c8843e74ba0c960f4a98cd80d4665d39", size = 54132, upload-time = "2025-08-12T05:51:49.864Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/46/d011725b0c89e853dc44cceb738a307cde5d240d023d6d40a82d1b4e1182/wrapt-1.17.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e228514a06843cae89621384cfe3a80418f3c04aadf8a3b14e46a7be704e4235", size = 39091, upload-time = "2025-08-12T05:51:38.935Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/9e/3ad852d77c35aae7ddebdbc3b6d35ec8013af7d7dddad0ad911f3d891dae/wrapt-1.17.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5ea5eb3c0c071862997d6f3e02af1d055f381b1d25b286b9d6644b79db77657c", size = 39172, upload-time = "2025-08-12T05:51:59.365Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/f7/c983d2762bcce2326c317c26a6a1e7016f7eb039c27cdf5c4e30f4160f31/wrapt-1.17.3-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:281262213373b6d5e4bb4353bc36d1ba4084e6d6b5d242863721ef2bf2c2930b", size = 87163, upload-time = "2025-08-12T05:52:40.965Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/0f/f673f75d489c7f22d17fe0193e84b41540d962f75fce579cf6873167c29b/wrapt-1.17.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4a8d2b25efb6681ecacad42fca8859f88092d8732b170de6a5dddd80a1c8fa", size = 87963, upload-time = "2025-08-12T05:52:20.326Z" },
+    { url = "https://files.pythonhosted.org/packages/df/61/515ad6caca68995da2fac7a6af97faab8f78ebe3bf4f761e1b77efbc47b5/wrapt-1.17.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:373342dd05b1d07d752cecbec0c41817231f29f3a89aa8b8843f7b95992ed0c7", size = 86945, upload-time = "2025-08-12T05:52:21.581Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/bd/4e70162ce398462a467bc09e768bee112f1412e563620adc353de9055d33/wrapt-1.17.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d40770d7c0fd5cbed9d84b2c3f2e156431a12c9a37dc6284060fb4bec0b7ffd4", size = 86857, upload-time = "2025-08-12T05:52:43.043Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/b8/da8560695e9284810b8d3df8a19396a6e40e7518059584a1a394a2b35e0a/wrapt-1.17.3-cp314-cp314-win32.whl", hash = "sha256:fbd3c8319de8e1dc79d346929cd71d523622da527cca14e0c1d257e31c2b8b10", size = 37178, upload-time = "2025-08-12T05:53:12.605Z" },
+    { url = "https://files.pythonhosted.org/packages/db/c8/b71eeb192c440d67a5a0449aaee2310a1a1e8eca41676046f99ed2487e9f/wrapt-1.17.3-cp314-cp314-win_amd64.whl", hash = "sha256:e1a4120ae5705f673727d3253de3ed0e016f7cd78dc463db1b31e2463e1f3cf6", size = 39310, upload-time = "2025-08-12T05:53:11.106Z" },
+    { url = "https://files.pythonhosted.org/packages/45/20/2cda20fd4865fa40f86f6c46ed37a2a8356a7a2fde0773269311f2af56c7/wrapt-1.17.3-cp314-cp314-win_arm64.whl", hash = "sha256:507553480670cab08a800b9463bdb881b2edeed77dc677b0a5915e6106e91a58", size = 37266, upload-time = "2025-08-12T05:52:56.531Z" },
+    { url = "https://files.pythonhosted.org/packages/77/ed/dd5cf21aec36c80443c6f900449260b80e2a65cf963668eaef3b9accce36/wrapt-1.17.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ed7c635ae45cfbc1a7371f708727bf74690daedc49b4dba310590ca0bd28aa8a", size = 56544, upload-time = "2025-08-12T05:51:51.109Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/96/450c651cc753877ad100c7949ab4d2e2ecc4d97157e00fa8f45df682456a/wrapt-1.17.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:249f88ed15503f6492a71f01442abddd73856a0032ae860de6d75ca62eed8067", size = 40283, upload-time = "2025-08-12T05:51:39.912Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/86/2fcad95994d9b572db57632acb6f900695a648c3e063f2cd344b3f5c5a37/wrapt-1.17.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a03a38adec8066d5a37bea22f2ba6bbf39fcdefbe2d91419ab864c3fb515454", size = 40366, upload-time = "2025-08-12T05:52:00.693Z" },
+    { url = "https://files.pythonhosted.org/packages/64/0e/f4472f2fdde2d4617975144311f8800ef73677a159be7fe61fa50997d6c0/wrapt-1.17.3-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5d4478d72eb61c36e5b446e375bbc49ed002430d17cdec3cecb36993398e1a9e", size = 108571, upload-time = "2025-08-12T05:52:44.521Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/01/9b85a99996b0a97c8a17484684f206cbb6ba73c1ce6890ac668bcf3838fb/wrapt-1.17.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223db574bb38637e8230eb14b185565023ab624474df94d2af18f1cdb625216f", size = 113094, upload-time = "2025-08-12T05:52:22.618Z" },
+    { url = "https://files.pythonhosted.org/packages/25/02/78926c1efddcc7b3aa0bc3d6b33a822f7d898059f7cd9ace8c8318e559ef/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e405adefb53a435f01efa7ccdec012c016b5a1d3f35459990afc39b6be4d5056", size = 110659, upload-time = "2025-08-12T05:52:24.057Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/ee/c414501ad518ac3e6fe184753632fe5e5ecacdcf0effc23f31c1e4f7bfcf/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:88547535b787a6c9ce4086917b6e1d291aa8ed914fdd3a838b3539dc95c12804", size = 106946, upload-time = "2025-08-12T05:52:45.976Z" },
+    { url = "https://files.pythonhosted.org/packages/be/44/a1bd64b723d13bb151d6cc91b986146a1952385e0392a78567e12149c7b4/wrapt-1.17.3-cp314-cp314t-win32.whl", hash = "sha256:41b1d2bc74c2cac6f9074df52b2efbef2b30bdfe5f40cb78f8ca22963bc62977", size = 38717, upload-time = "2025-08-12T05:53:15.214Z" },
+    { url = "https://files.pythonhosted.org/packages/79/d9/7cfd5a312760ac4dd8bf0184a6ee9e43c33e47f3dadc303032ce012b8fa3/wrapt-1.17.3-cp314-cp314t-win_amd64.whl", hash = "sha256:73d496de46cd2cdbdbcce4ae4bcdb4afb6a11234a1df9c085249d55166b95116", size = 41334, upload-time = "2025-08-12T05:53:14.178Z" },
+    { url = "https://files.pythonhosted.org/packages/46/78/10ad9781128ed2f99dbc474f43283b13fea8ba58723e98844367531c18e9/wrapt-1.17.3-cp314-cp314t-win_arm64.whl", hash = "sha256:f38e60678850c42461d4202739f9bf1e3a737c7ad283638251e79cc49effb6b6", size = 38471, upload-time = "2025-08-12T05:52:57.784Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" },
+]
+
+[[package]]
+name = "wrapt"
+version = "2.1.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f7/37/ae31f40bec90de2f88d9597d0b5281e23ffe85b893a47ca5d9c05c63a4f6/wrapt-2.1.1.tar.gz", hash = "sha256:5fdcb09bf6db023d88f312bd0767594b414655d58090fc1c46b3414415f67fac", size = 81329, upload-time = "2026-02-03T02:12:13.786Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ca/21/293b657a27accfbbbb6007ebd78af0efa2083dac83e8f523272ea09b4638/wrapt-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7e927375e43fd5a985b27a8992327c22541b6dede1362fc79df337d26e23604f", size = 60554, upload-time = "2026-02-03T02:11:17.362Z" },
+    { url = "https://files.pythonhosted.org/packages/25/e9/96dd77728b54a899d4ce2798d7b1296989ce687ed3c0cb917d6b3154bf5d/wrapt-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c99544b6a7d40ca22195563b6d8bc3986ee8bb82f272f31f0670fe9440c869", size = 61496, upload-time = "2026-02-03T02:12:54.732Z" },
+    { url = "https://files.pythonhosted.org/packages/44/79/4c755b45df6ef30c0dd628ecfaa0c808854be147ca438429da70a162833c/wrapt-2.1.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b2be3fa5f4efaf16ee7c77d0556abca35f5a18ad4ac06f0ef3904c3399010ce9", size = 113528, upload-time = "2026-02-03T02:12:26.405Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/63/23ce28f7b841217d9a6337a340fbb8d4a7fbd67a89d47f377c8550fa34aa/wrapt-2.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67c90c1ae6489a6cb1a82058902caa8006706f7b4e8ff766f943e9d2c8e608d0", size = 115536, upload-time = "2026-02-03T02:11:54.397Z" },
+    { url = "https://files.pythonhosted.org/packages/23/7b/5ca8d3b12768670d16c8329e29960eedd56212770365a02a8de8bf73dc01/wrapt-2.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:05c0db35ccffd7480143e62df1e829d101c7b86944ae3be7e4869a7efa621f53", size = 114716, upload-time = "2026-02-03T02:12:20.771Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/3a/9789ccb14a096d30bb847bf3ee137bf682cc9750c2ce155f4c5ae1962abf/wrapt-2.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0c2ec9f616755b2e1e0bf4d0961f59bb5c2e7a77407e7e2c38ef4f7d2fdde12c", size = 113200, upload-time = "2026-02-03T02:12:07.688Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/e5/4ec3526ce6ce920b267c8d35d2c2f0874d3fad2744c8b7259353f1132baa/wrapt-2.1.1-cp310-cp310-win32.whl", hash = "sha256:203ba6b3f89e410e27dbd30ff7dccaf54dcf30fda0b22aa1b82d560c7f9fe9a1", size = 57876, upload-time = "2026-02-03T02:11:42.61Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/4e/661c7c76ecd85375b2bc03488941a3a1078642af481db24949e2b9de01f4/wrapt-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:6f9426d9cfc2f8732922fc96198052e55c09bb9db3ddaa4323a18e055807410e", size = 60224, upload-time = "2026-02-03T02:11:19.096Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/b7/53c7252d371efada4cb119e72e774fa2c6b3011fc33e3e552cdf48fb9488/wrapt-2.1.1-cp310-cp310-win_arm64.whl", hash = "sha256:69c26f51b67076b40714cff81bdd5826c0b10c077fb6b0678393a6a2f952a5fc", size = 58645, upload-time = "2026-02-03T02:12:10.396Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/a8/9254e4da74b30a105935197015b18b31b7a298bf046e67d8952ef74967bd/wrapt-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c366434a7fb914c7a5de508ed735ef9c133367114e1a7cb91dfb5cd806a1549", size = 60554, upload-time = "2026-02-03T02:11:13.038Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/a1/378579880cc7af226354054a2c255f69615b379d8adad482bfe2f22a0dc2/wrapt-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d6a2068bd2e1e19e5a317c8c0b288267eec4e7347c36bc68a6e378a39f19ee7", size = 61491, upload-time = "2026-02-03T02:12:56.077Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/72/957b51c56acca35701665878ad31626182199fc4afecfe67dea072210f95/wrapt-2.1.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:891ab4713419217b2aed7dd106c9200f64e6a82226775a0d2ebd6bef2ebd1747", size = 113949, upload-time = "2026-02-03T02:11:04.516Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/74/36bbebb4a3d2ae9c3e6929639721f8606cd0710a82a777c371aa69e36504/wrapt-2.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8ef36a0df38d2dc9d907f6617f89e113c5892e0a35f58f45f75901af0ce7d81", size = 115989, upload-time = "2026-02-03T02:12:19.398Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/0d/f1177245a083c7be284bc90bddfe5aece32cdd5b858049cb69ce001a0e8d/wrapt-2.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:76e9af3ebd86f19973143d4d592cbf3e970cf3f66ddee30b16278c26ae34b8ab", size = 115242, upload-time = "2026-02-03T02:11:08.111Z" },
+    { url = "https://files.pythonhosted.org/packages/62/3e/3b7cf5da27e59df61b1eae2d07dd03ff5d6f75b5408d694873cca7a8e33c/wrapt-2.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ff562067485ebdeaef2fa3fe9b1876bc4e7b73762e0a01406ad81e2076edcebf", size = 113676, upload-time = "2026-02-03T02:12:41.026Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/65/8248d3912c705f2c66f81cb97c77436f37abcbedb16d633b5ab0d795d8cd/wrapt-2.1.1-cp311-cp311-win32.whl", hash = "sha256:9e60a30aa0909435ec4ea2a3c53e8e1b50ac9f640c0e9fe3f21fd248a22f06c5", size = 57863, upload-time = "2026-02-03T02:12:18.112Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/31/d29310ab335f71f00c50466153b3dc985aaf4a9fc03263e543e136859541/wrapt-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:7d79954f51fcf84e5ec4878ab4aea32610d70145c5bbc84b3370eabfb1e096c2", size = 60224, upload-time = "2026-02-03T02:12:29.289Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/90/a6ec319affa6e2894962a0cb9d73c67f88af1a726d15314bfb5c88b8a08d/wrapt-2.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:d3ffc6b0efe79e08fd947605fd598515aebefe45e50432dc3b5cd437df8b1ada", size = 58643, upload-time = "2026-02-03T02:12:43.022Z" },
+    { url = "https://files.pythonhosted.org/packages/df/cb/4d5255d19bbd12be7f8ee2c1fb4269dddec9cef777ef17174d357468efaa/wrapt-2.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ab8e3793b239db021a18782a5823fcdea63b9fe75d0e340957f5828ef55fcc02", size = 61143, upload-time = "2026-02-03T02:11:46.313Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/07/7ed02daa35542023464e3c8b7cb937fa61f6c61c0361ecf8f5fecf8ad8da/wrapt-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7c0300007836373d1c2df105b40777986accb738053a92fe09b615a7a4547e9f", size = 61740, upload-time = "2026-02-03T02:12:51.966Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/60/a237a4e4a36f6d966061ccc9b017627d448161b19e0a3ab80a7c7c97f859/wrapt-2.1.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2b27c070fd1132ab23957bcd4ee3ba707a91e653a9268dc1afbd39b77b2799f7", size = 121327, upload-time = "2026-02-03T02:11:06.796Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/fe/9139058a3daa8818fc67e6460a2340e8bbcf3aef8b15d0301338bbe181ca/wrapt-2.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b0e36d845e8b6f50949b6b65fc6cd279f47a1944582ed4ec8258cd136d89a64", size = 122903, upload-time = "2026-02-03T02:12:48.657Z" },
+    { url = "https://files.pythonhosted.org/packages/91/10/b8479202b4164649675846a531763531f0a6608339558b5a0a718fc49a8d/wrapt-2.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4aeea04a9889370fcfb1ef828c4cc583f36a875061505cd6cd9ba24d8b43cc36", size = 121333, upload-time = "2026-02-03T02:11:32.148Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/75/75fc793b791d79444aca2c03ccde64e8b99eda321b003f267d570b7b0985/wrapt-2.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d88b46bb0dce9f74b6817bc1758ff2125e1ca9e1377d62ea35b6896142ab6825", size = 120458, upload-time = "2026-02-03T02:11:16.039Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/8f/c3f30d511082ca6d947c405f9d8f6c8eaf83cfde527c439ec2c9a30eb5ea/wrapt-2.1.1-cp312-cp312-win32.whl", hash = "sha256:63decff76ca685b5c557082dfbea865f3f5f6d45766a89bff8dc61d336348833", size = 58086, upload-time = "2026-02-03T02:12:35.041Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/c8/37625b643eea2849f10c3b90f69c7462faa4134448d4443234adaf122ae5/wrapt-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:b828235d26c1e35aca4107039802ae4b1411be0fe0367dd5b7e4d90e562fcbcd", size = 60328, upload-time = "2026-02-03T02:12:45.808Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/79/56242f07572d5682ba8065a9d4d9c2218313f576e3c3471873c2a5355ffd/wrapt-2.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:75128507413a9f1bcbe2db88fd18fbdbf80f264b82fa33a6996cdeaf01c52352", size = 58722, upload-time = "2026-02-03T02:12:27.949Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/ca/3cf290212855b19af9fcc41b725b5620b32f470d6aad970c2593500817eb/wrapt-2.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9646e17fa7c3e2e7a87e696c7de66512c2b4f789a8db95c613588985a2e139", size = 61150, upload-time = "2026-02-03T02:12:50.575Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/33/5b8f89a82a9859ce82da4870c799ad11ce15648b6e1c820fec3e23f4a19f/wrapt-2.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:428cfc801925454395aa468ba7ddb3ed63dc0d881df7b81626cdd433b4e2b11b", size = 61743, upload-time = "2026-02-03T02:11:55.733Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/2f/60c51304fbdf47ce992d9eefa61fbd2c0e64feee60aaa439baf42ea6f40b/wrapt-2.1.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5797f65e4d58065a49088c3b32af5410751cd485e83ba89e5a45e2aa8905af98", size = 121341, upload-time = "2026-02-03T02:11:20.461Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/03/ce5256e66dd94e521ad5e753c78185c01b6eddbed3147be541f4d38c0cb7/wrapt-2.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a2db44a71202c5ae4bb5f27c6d3afbc5b23053f2e7e78aa29704541b5dad789", size = 122947, upload-time = "2026-02-03T02:11:33.596Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/ae/50ca8854b81b946a11a36fcd6ead32336e6db2c14b6e4a8b092b80741178/wrapt-2.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8d5350c3590af09c1703dd60ec78a7370c0186e11eaafb9dda025a30eee6492d", size = 121370, upload-time = "2026-02-03T02:11:09.886Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/d9/d6a7c654e0043319b4cc137a4caaf7aa16b46b51ee8df98d1060254705b7/wrapt-2.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2d9b076411bed964e752c01b49fd224cc385f3a96f520c797d38412d70d08359", size = 120465, upload-time = "2026-02-03T02:11:37.592Z" },
+    { url = "https://files.pythonhosted.org/packages/55/90/65be41e40845d951f714b5a77e84f377a3787b1e8eee6555a680da6d0db5/wrapt-2.1.1-cp313-cp313-win32.whl", hash = "sha256:0bb7207130ce6486727baa85373503bf3334cc28016f6928a0fa7e19d7ecdc06", size = 58090, upload-time = "2026-02-03T02:12:53.342Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/66/6a09e0294c4fc8c26028a03a15191721c9271672467cc33e6617ee0d91d2/wrapt-2.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:cbfee35c711046b15147b0ae7db9b976f01c9520e6636d992cd9e69e5e2b03b1", size = 60341, upload-time = "2026-02-03T02:12:36.384Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/f0/20ceb8b701e9a71555c87a5ddecbed76ec16742cf1e4b87bbaf26735f998/wrapt-2.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:7d2756061022aebbf57ba14af9c16e8044e055c22d38de7bf40d92b565ecd2b0", size = 58731, upload-time = "2026-02-03T02:12:01.328Z" },
+    { url = "https://files.pythonhosted.org/packages/80/b4/fe95beb8946700b3db371f6ce25115217e7075ca063663b8cca2888ba55c/wrapt-2.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4814a3e58bc6971e46baa910ecee69699110a2bf06c201e24277c65115a20c20", size = 62969, upload-time = "2026-02-03T02:11:51.245Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/89/477b0bdc784e3299edf69c279697372b8bd4c31d9c6966eae405442899df/wrapt-2.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:106c5123232ab9b9f4903692e1fa0bdc231510098f04c13c3081f8ad71c3d612", size = 63606, upload-time = "2026-02-03T02:12:02.64Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/55/9d0c1269ab76de87715b3b905df54dd25d55bbffd0b98696893eb613469f/wrapt-2.1.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1a40b83ff2535e6e56f190aff123821eea89a24c589f7af33413b9c19eb2c738", size = 152536, upload-time = "2026-02-03T02:11:24.492Z" },
+    { url = "https://files.pythonhosted.org/packages/44/18/2004766030462f79ad86efaa62000b5e39b1ff001dcce86650e1625f40ae/wrapt-2.1.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:789cea26e740d71cf1882e3a42bb29052bc4ada15770c90072cb47bf73fb3dbf", size = 158697, upload-time = "2026-02-03T02:12:32.214Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/bb/0a880fa0f35e94ee843df4ee4dd52a699c9263f36881311cfb412c09c3e5/wrapt-2.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ba49c14222d5e5c0ee394495a8655e991dc06cbca5398153aefa5ac08cd6ccd7", size = 155563, upload-time = "2026-02-03T02:11:49.737Z" },
+    { url = "https://files.pythonhosted.org/packages/42/ff/cd1b7c4846c8678fac359a6eb975dc7ab5bd606030adb22acc8b4a9f53f1/wrapt-2.1.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ac8cda531fe55be838a17c62c806824472bb962b3afa47ecbd59b27b78496f4e", size = 150161, upload-time = "2026-02-03T02:12:33.613Z" },
+    { url = "https://files.pythonhosted.org/packages/38/ec/67c90a7082f452964b4621e4890e9a490f1add23cdeb7483cc1706743291/wrapt-2.1.1-cp313-cp313t-win32.whl", hash = "sha256:b8af75fe20d381dd5bcc9db2e86a86d7fcfbf615383a7147b85da97c1182225b", size = 59783, upload-time = "2026-02-03T02:11:39.863Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/08/466afe4855847d8febdfa2c57c87e991fc5820afbdef01a273683dfd15a0/wrapt-2.1.1-cp313-cp313t-win_amd64.whl", hash = "sha256:45c5631c9b6c792b78be2d7352129f776dd72c605be2c3a4e9be346be8376d83", size = 63082, upload-time = "2026-02-03T02:12:09.075Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/62/60b629463c28b15b1eeadb3a0691e17568622b12aa5bfa7ebe9b514bfbeb/wrapt-2.1.1-cp313-cp313t-win_arm64.whl", hash = "sha256:da815b9263947ac98d088b6414ac83507809a1d385e4632d9489867228d6d81c", size = 60251, upload-time = "2026-02-03T02:11:21.794Z" },
+    { url = "https://files.pythonhosted.org/packages/95/a0/1c2396e272f91efe6b16a6a8bce7ad53856c8f9ae4f34ceaa711d63ec9e1/wrapt-2.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9aa1765054245bb01a37f615503290d4e207e3fd59226e78341afb587e9c1236", size = 61311, upload-time = "2026-02-03T02:12:44.41Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/9a/d2faba7e61072a7507b5722db63562fdb22f5a24e237d460d18755627f15/wrapt-2.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:feff14b63a6d86c1eee33a57f77573649f2550935981625be7ff3cb7342efe05", size = 61805, upload-time = "2026-02-03T02:11:59.905Z" },
+    { url = "https://files.pythonhosted.org/packages/db/56/073989deb4b5d7d6e7ea424476a4ae4bda02140f2dbeaafb14ba4864dd60/wrapt-2.1.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:81fc5f22d5fcfdbabde96bb3f5379b9f4476d05c6d524d7259dc5dfb501d3281", size = 120308, upload-time = "2026-02-03T02:12:04.46Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/b6/84f37261295e38167a29eb82affaf1dc15948dc416925fe2091beee8e4ac/wrapt-2.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:951b228ecf66def855d22e006ab9a1fc12535111ae7db2ec576c728f8ddb39e8", size = 122688, upload-time = "2026-02-03T02:11:23.148Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/80/32db2eec6671f80c65b7ff175be61bc73d7f5223f6910b0c921bbc4bd11c/wrapt-2.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ddf582a95641b9a8c8bd643e83f34ecbbfe1b68bc3850093605e469ab680ae3", size = 121115, upload-time = "2026-02-03T02:12:39.068Z" },
+    { url = "https://files.pythonhosted.org/packages/49/ef/dcd00383df0cd696614127902153bf067971a5aabcd3c9dcb2d8ef354b2a/wrapt-2.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:fc5c500966bf48913f795f1984704e6d452ba2414207b15e1f8c339a059d5b16", size = 119484, upload-time = "2026-02-03T02:11:48.419Z" },
+    { url = "https://files.pythonhosted.org/packages/76/29/0630280cdd2bd8f86f35cb6854abee1c9d6d1a28a0c6b6417cd15d378325/wrapt-2.1.1-cp314-cp314-win32.whl", hash = "sha256:4aa4baadb1f94b71151b8e44a0c044f6af37396c3b8bcd474b78b49e2130a23b", size = 58514, upload-time = "2026-02-03T02:11:58.616Z" },
+    { url = "https://files.pythonhosted.org/packages/db/19/5bed84f9089ed2065f6aeda5dfc4f043743f642bc871454b261c3d7d322b/wrapt-2.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:860e9d3fd81816a9f4e40812f28be4439ab01f260603c749d14be3c0a1170d19", size = 60763, upload-time = "2026-02-03T02:12:24.553Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/cb/b967f2f9669e4249b4fe82e630d2a01bc6b9e362b9b12ed91bbe23ae8df4/wrapt-2.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:3c59e103017a2c1ea0ddf589cbefd63f91081d7ce9d491d69ff2512bb1157e23", size = 59051, upload-time = "2026-02-03T02:11:29.602Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/19/6fed62be29f97eb8a56aff236c3f960a4b4a86e8379dc7046a8005901a97/wrapt-2.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9fa7c7e1bee9278fc4f5dd8275bc8d25493281a8ec6c61959e37cc46acf02007", size = 63059, upload-time = "2026-02-03T02:12:06.368Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/1c/b757fd0adb53d91547ed8fad76ba14a5932d83dde4c994846a2804596378/wrapt-2.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:39c35e12e8215628984248bd9c8897ce0a474be2a773db207eb93414219d8469", size = 63618, upload-time = "2026-02-03T02:12:23.197Z" },
+    { url = "https://files.pythonhosted.org/packages/10/fe/e5ae17b1480957c7988d991b93df9f2425fc51f128cf88144d6a18d0eb12/wrapt-2.1.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:94ded4540cac9125eaa8ddf5f651a7ec0da6f5b9f248fe0347b597098f8ec14c", size = 152544, upload-time = "2026-02-03T02:11:43.915Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/cc/99aed210c6b547b8a6e4cb9d1425e4466727158a6aeb833aa7997e9e08dd/wrapt-2.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da0af328373f97ed9bdfea24549ac1b944096a5a71b30e41c9b8b53ab3eec04a", size = 158700, upload-time = "2026-02-03T02:12:30.684Z" },
+    { url = "https://files.pythonhosted.org/packages/81/0e/d442f745f4957944d5f8ad38bc3a96620bfff3562533b87e486e979f3d99/wrapt-2.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4ad839b55f0bf235f8e337ce060572d7a06592592f600f3a3029168e838469d3", size = 155561, upload-time = "2026-02-03T02:11:28.164Z" },
+    { url = "https://files.pythonhosted.org/packages/51/ac/9891816280e0018c48f8dfd61b136af7b0dcb4a088895db2531acde5631b/wrapt-2.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0d89c49356e5e2a50fa86b40e0510082abcd0530f926cbd71cf25bee6b9d82d7", size = 150188, upload-time = "2026-02-03T02:11:57.053Z" },
+    { url = "https://files.pythonhosted.org/packages/24/98/e2f273b6d70d41f98d0739aa9a269d0b633684a5fb17b9229709375748d4/wrapt-2.1.1-cp314-cp314t-win32.whl", hash = "sha256:f4c7dd22cf7f36aafe772f3d88656559205c3af1b7900adfccb70edeb0d2abc4", size = 60425, upload-time = "2026-02-03T02:11:35.007Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/06/b500bfc38a4f82d89f34a13069e748c82c5430d365d9e6b75afb3ab74457/wrapt-2.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:f76bc12c583ab01e73ba0ea585465a41e48d968f6d1311b4daec4f8654e356e3", size = 63855, upload-time = "2026-02-03T02:12:15.47Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/cc/5f6193c32166faee1d2a613f278608e6f3b95b96589d020f0088459c46c9/wrapt-2.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:7ea74fc0bec172f1ae5f3505b6655c541786a5cabe4bbc0d9723a56ac32eb9b9", size = 60443, upload-time = "2026-02-03T02:11:30.869Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/da/5a086bf4c22a41995312db104ec2ffeee2cf6accca9faaee5315c790377d/wrapt-2.1.1-py3-none-any.whl", hash = "sha256:3b0f4629eb954394a3d7c7a1c8cca25f0b07cefe6aa8545e862e9778152de5b7", size = 43886, upload-time = "2026-02-03T02:11:45.048Z" },
 ]
 
 [[package]]
@@ -7731,142 +7553,128 @@ wheels = [
 
 [[package]]
 name = "yarl"
-version = "1.23.0"
+version = "1.22.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
     { name = "multidict" },
     { name = "propcache" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/23/6e/beb1beec874a72f23815c1434518bfc4ed2175065173fb138c3705f658d4/yarl-1.23.0.tar.gz", hash = "sha256:53b1ea6ca88ebd4420379c330aea57e258408dd0df9af0992e5de2078dc9f5d5", size = 194676, upload-time = "2026-03-01T22:07:53.373Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8b/0d/9cc638702f6fc3c7a3685bcc8cf2a9ed7d6206e932a49f5242658047ef51/yarl-1.23.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cff6d44cb13d39db2663a22b22305d10855efa0fa8015ddeacc40bc59b9d8107", size = 123764, upload-time = "2026-03-01T22:04:09.7Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/35/5a553687c5793df5429cd1db45909d4f3af7eee90014888c208d086a44f0/yarl-1.23.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c53f8347cd4200f0d70a48ad059cabaf24f5adc6ba08622a23423bc7efa10d", size = 86282, upload-time = "2026-03-01T22:04:11.892Z" },
-    { url = "https://files.pythonhosted.org/packages/68/2e/c5a2234238f8ce37a8312b52801ee74117f576b1539eec8404a480434acc/yarl-1.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a6940a074fb3c48356ed0158a3ca5699c955ee4185b4d7d619be3c327143e05", size = 86053, upload-time = "2026-03-01T22:04:13.292Z" },
-    { url = "https://files.pythonhosted.org/packages/74/3f/bbd8ff36fb038622797ffbaf7db314918bb4d76f1cc8a4f9ca7a55fe5195/yarl-1.23.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed5f69ce7be7902e5c70ea19eb72d20abf7d725ab5d49777d696e32d4fc1811d", size = 99395, upload-time = "2026-03-01T22:04:15.133Z" },
-    { url = "https://files.pythonhosted.org/packages/77/04/9516bc4e269d2a3ec9c6779fcdeac51ce5b3a9b0156f06ac7152e5bba864/yarl-1.23.0-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:389871e65468400d6283c0308e791a640b5ab5c83bcee02a2f51295f95e09748", size = 92143, upload-time = "2026-03-01T22:04:16.829Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/63/88802d1f6b1cb1fc67d67a58cd0cf8a1790de4ce7946e434240f1d60ab4a/yarl-1.23.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dda608c88cf709b1d406bdfcd84d8d63cff7c9e577a403c6108ce8ce9dcc8764", size = 107643, upload-time = "2026-03-01T22:04:18.519Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/db/4f9b838f4d8bdd6f0f385aed8bbf21c71ed11a0b9983305c302cbd557815/yarl-1.23.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8c4fe09e0780c6c3bf2b7d4af02ee2394439d11a523bbcf095cf4747c2932007", size = 108700, upload-time = "2026-03-01T22:04:20.373Z" },
-    { url = "https://files.pythonhosted.org/packages/50/12/95a1d33f04a79c402664070d43b8b9f72dc18914e135b345b611b0b1f8cc/yarl-1.23.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31c9921eb8bd12633b41ad27686bbb0b1a2a9b8452bfdf221e34f311e9942ed4", size = 102769, upload-time = "2026-03-01T22:04:23.055Z" },
-    { url = "https://files.pythonhosted.org/packages/86/65/91a0285f51321369fd1a8308aa19207520c5f0587772cfc2e03fc2467e90/yarl-1.23.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5f10fd85e4b75967468af655228fbfd212bdf66db1c0d135065ce288982eda26", size = 101114, upload-time = "2026-03-01T22:04:25.031Z" },
-    { url = "https://files.pythonhosted.org/packages/58/80/c7c8244fc3e5bc483dc71a09560f43b619fab29301a0f0a8f936e42865c7/yarl-1.23.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dbf507e9ef5688bada447a24d68b4b58dd389ba93b7afc065a2ba892bea54769", size = 98883, upload-time = "2026-03-01T22:04:27.281Z" },
-    { url = "https://files.pythonhosted.org/packages/86/e7/71ca9cc9ca79c0b7d491216177d1aed559d632947b8ffb0ee60f7d8b23e3/yarl-1.23.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:85e9beda1f591bc73e77ea1c51965c68e98dafd0fec72cdd745f77d727466716", size = 94172, upload-time = "2026-03-01T22:04:28.554Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/3f/6c6c8a0fe29c26fb2db2e8d32195bb84ec1bfb8f1d32e7f73b787fcf349b/yarl-1.23.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:0e1fdaa14ef51366d7757b45bde294e95f6c8c049194e793eedb8387c86d5993", size = 107010, upload-time = "2026-03-01T22:04:30.385Z" },
-    { url = "https://files.pythonhosted.org/packages/56/38/12730c05e5ad40a76374d440ed8b0899729a96c250516d91c620a6e38fc2/yarl-1.23.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:75e3026ab649bf48f9a10c0134512638725b521340293f202a69b567518d94e0", size = 100285, upload-time = "2026-03-01T22:04:31.752Z" },
-    { url = "https://files.pythonhosted.org/packages/34/92/6a7be9239f2347234e027284e7a5f74b1140cc86575e7b469d13fba1ebfe/yarl-1.23.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:80e6d33a3d42a7549b409f199857b4fb54e2103fc44fb87605b6663b7a7ff750", size = 108230, upload-time = "2026-03-01T22:04:33.844Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/81/4aebccfa9376bd98b9d8bfad20621a57d3e8cfc5b8631c1fa5f62cdd03f4/yarl-1.23.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5ec2f42d41ccbd5df0270d7df31618a8ee267bfa50997f5d720ddba86c4a83a6", size = 103008, upload-time = "2026-03-01T22:04:35.856Z" },
-    { url = "https://files.pythonhosted.org/packages/38/0f/0b4e3edcec794a86b853b0c6396c0a888d72dfce19b2d88c02ac289fb6c1/yarl-1.23.0-cp310-cp310-win32.whl", hash = "sha256:debe9c4f41c32990771be5c22b56f810659f9ddf3d63f67abfdcaa2c6c9c5c1d", size = 83073, upload-time = "2026-03-01T22:04:38.268Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/71/ad95c33da18897e4c636528bbc24a1dd23fe16797de8bc4ec667b8db0ba4/yarl-1.23.0-cp310-cp310-win_amd64.whl", hash = "sha256:ab5f043cb8a2d71c981c09c510da013bc79fd661f5c60139f00dd3c3cc4f2ffb", size = 87328, upload-time = "2026-03-01T22:04:39.558Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/14/dfa369523c79bccf9c9c746b0a63eb31f65db9418ac01275f7950962e504/yarl-1.23.0-cp310-cp310-win_arm64.whl", hash = "sha256:263cd4f47159c09b8b685890af949195b51d1aa82ba451c5847ca9bc6413c220", size = 82463, upload-time = "2026-03-01T22:04:41.454Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/aa/60da938b8f0997ba3a911263c40d82b6f645a67902a490b46f3355e10fae/yarl-1.23.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b35d13d549077713e4414f927cdc388d62e543987c572baee613bf82f11a4b99", size = 123641, upload-time = "2026-03-01T22:04:42.841Z" },
-    { url = "https://files.pythonhosted.org/packages/24/84/e237607faf4e099dbb8a4f511cfd5efcb5f75918baad200ff7380635631b/yarl-1.23.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cbb0fef01f0c6b38cb0f39b1f78fc90b807e0e3c86a7ff3ce74ad77ce5c7880c", size = 86248, upload-time = "2026-03-01T22:04:44.757Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/0d/71ceabc14c146ba8ee3804ca7b3d42b1664c8440439de5214d366fec7d3a/yarl-1.23.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dc52310451fc7c629e13c4e061cbe2dd01684d91f2f8ee2821b083c58bd72432", size = 85988, upload-time = "2026-03-01T22:04:46.365Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/6c/4a90d59c572e46b270ca132aca66954f1175abd691f74c1ef4c6711828e2/yarl-1.23.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b2c6b50c7b0464165472b56b42d4c76a7b864597007d9c085e8b63e185cf4a7a", size = 100566, upload-time = "2026-03-01T22:04:47.639Z" },
-    { url = "https://files.pythonhosted.org/packages/49/fb/c438fb5108047e629f6282a371e6e91cf3f97ee087c4fb748a1f32ceef55/yarl-1.23.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:aafe5dcfda86c8af00386d7781d4c2181b5011b7be3f2add5e99899ea925df05", size = 92079, upload-time = "2026-03-01T22:04:48.925Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/13/d269aa1aed3e4f50a5a103f96327210cc5fa5dd2d50882778f13c7a14606/yarl-1.23.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9ee33b875f0b390564c1fb7bc528abf18c8ee6073b201c6ae8524aca778e2d83", size = 108741, upload-time = "2026-03-01T22:04:50.838Z" },
-    { url = "https://files.pythonhosted.org/packages/85/fb/115b16f22c37ea4437d323e472945bea97301c8ec6089868fa560abab590/yarl-1.23.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4c41e021bc6d7affb3364dc1e1e5fa9582b470f283748784bd6ea0558f87f42c", size = 108099, upload-time = "2026-03-01T22:04:52.499Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/64/c53487d9f4968045b8afa51aed7ca44f58b2589e772f32745f3744476c82/yarl-1.23.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:99c8a9ed30f4164bc4c14b37a90208836cbf50d4ce2a57c71d0f52c7fb4f7598", size = 102678, upload-time = "2026-03-01T22:04:55.176Z" },
-    { url = "https://files.pythonhosted.org/packages/85/59/cd98e556fbb2bf8fab29c1a722f67ad45c5f3447cac798ab85620d1e70af/yarl-1.23.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f2af5c81a1f124609d5f33507082fc3f739959d4719b56877ab1ee7e7b3d602b", size = 100803, upload-time = "2026-03-01T22:04:56.588Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/c0/b39770b56d4a9f0bb5f77e2f1763cd2d75cc2f6c0131e3b4c360348fcd65/yarl-1.23.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6b41389c19b07c760c7e427a3462e8ab83c4bb087d127f0e854c706ce1b9215c", size = 100163, upload-time = "2026-03-01T22:04:58.492Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/64/6980f99ab00e1f0ff67cb84766c93d595b067eed07439cfccfc8fb28c1a6/yarl-1.23.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:1dc702e42d0684f42d6519c8d581e49c96cefaaab16691f03566d30658ee8788", size = 93859, upload-time = "2026-03-01T22:05:00.268Z" },
-    { url = "https://files.pythonhosted.org/packages/38/69/912e6c5e146793e5d4b5fe39ff5b00f4d22463dfd5a162bec565ac757673/yarl-1.23.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:0e40111274f340d32ebcc0a5668d54d2b552a6cca84c9475859d364b380e3222", size = 108202, upload-time = "2026-03-01T22:05:02.273Z" },
-    { url = "https://files.pythonhosted.org/packages/59/97/35ca6767524687ad64e5f5c31ad54bc76d585585a9fcb40f649e7e82ffed/yarl-1.23.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:4764a6a7588561a9aef92f65bda2c4fb58fe7c675c0883862e6df97559de0bfb", size = 99866, upload-time = "2026-03-01T22:05:03.597Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/1c/1a3387ee6d73589f6f2a220ae06f2984f6c20b40c734989b0a44f5987308/yarl-1.23.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:03214408cfa590df47728b84c679ae4ef00be2428e11630277be0727eba2d7cc", size = 107852, upload-time = "2026-03-01T22:05:04.986Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/b8/35c0750fcd5a3f781058bfd954515dd4b1eab45e218cbb85cf11132215f1/yarl-1.23.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:170e26584b060879e29fac213e4228ef063f39128723807a312e5c7fec28eff2", size = 102919, upload-time = "2026-03-01T22:05:06.397Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/1c/9a1979aec4a81896d597bcb2177827f2dbee3f5b7cc48b2d0dadb644b41d/yarl-1.23.0-cp311-cp311-win32.whl", hash = "sha256:51430653db848d258336cfa0244427b17d12db63d42603a55f0d4546f50f25b5", size = 82602, upload-time = "2026-03-01T22:05:08.444Z" },
-    { url = "https://files.pythonhosted.org/packages/93/22/b85eca6fa2ad9491af48c973e4c8cf6b103a73dbb271fe3346949449fca0/yarl-1.23.0-cp311-cp311-win_amd64.whl", hash = "sha256:bf49a3ae946a87083ef3a34c8f677ae4243f5b824bfc4c69672e72b3d6719d46", size = 87461, upload-time = "2026-03-01T22:05:10.145Z" },
-    { url = "https://files.pythonhosted.org/packages/93/95/07e3553fe6f113e6864a20bdc53a78113cda3b9ced8784ee52a52c9f80d8/yarl-1.23.0-cp311-cp311-win_arm64.whl", hash = "sha256:b39cb32a6582750b6cc77bfb3c49c0f8760dc18dc96ec9fb55fbb0f04e08b928", size = 82336, upload-time = "2026-03-01T22:05:11.554Z" },
-    { url = "https://files.pythonhosted.org/packages/88/8a/94615bc31022f711add374097ad4144d569e95ff3c38d39215d07ac153a0/yarl-1.23.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1932b6b8bba8d0160a9d1078aae5838a66039e8832d41d2992daa9a3a08f7860", size = 124737, upload-time = "2026-03-01T22:05:12.897Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/6f/c6554045d59d64052698add01226bc867b52fe4a12373415d7991fdca95d/yarl-1.23.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:411225bae281f114067578891bc75534cfb3d92a3b4dfef7a6ca78ba354e6069", size = 87029, upload-time = "2026-03-01T22:05:14.376Z" },
-    { url = "https://files.pythonhosted.org/packages/19/2a/725ecc166d53438bc88f76822ed4b1e3b10756e790bafd7b523fe97c322d/yarl-1.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13a563739ae600a631c36ce096615fe307f131344588b0bc0daec108cdb47b25", size = 86310, upload-time = "2026-03-01T22:05:15.71Z" },
-    { url = "https://files.pythonhosted.org/packages/99/30/58260ed98e6ff7f90ba84442c1ddd758c9170d70327394a6227b310cd60f/yarl-1.23.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9cbf44c5cb4a7633d078788e1b56387e3d3cf2b8139a3be38040b22d6c3221c8", size = 97587, upload-time = "2026-03-01T22:05:17.384Z" },
-    { url = "https://files.pythonhosted.org/packages/76/0a/8b08aac08b50682e65759f7f8dde98ae8168f72487e7357a5d684c581ef9/yarl-1.23.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53ad387048f6f09a8969631e4de3f1bf70c50e93545d64af4f751b2498755072", size = 92528, upload-time = "2026-03-01T22:05:18.804Z" },
-    { url = "https://files.pythonhosted.org/packages/52/07/0b7179101fe5f8385ec6c6bb5d0cb9f76bd9fb4a769591ab6fb5cdbfc69a/yarl-1.23.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4a59ba56f340334766f3a4442e0efd0af895fae9e2b204741ef885c446b3a1a8", size = 105339, upload-time = "2026-03-01T22:05:20.235Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/8a/36d82869ab5ec829ca8574dfcb92b51286fcfb1e9c7a73659616362dc880/yarl-1.23.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:803a3c3ce4acc62eaf01eaca1208dcf0783025ef27572c3336502b9c232005e7", size = 105061, upload-time = "2026-03-01T22:05:22.268Z" },
-    { url = "https://files.pythonhosted.org/packages/66/3e/868e5c3364b6cee19ff3e1a122194fa4ce51def02c61023970442162859e/yarl-1.23.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3d2bff8f37f8d0f96c7ec554d16945050d54462d6e95414babaa18bfafc7f51", size = 100132, upload-time = "2026-03-01T22:05:23.638Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/26/9c89acf82f08a52cb52d6d39454f8d18af15f9d386a23795389d1d423823/yarl-1.23.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c75eb09e8d55bceb4367e83496ff8ef2bc7ea6960efb38e978e8073ea59ecb67", size = 99289, upload-time = "2026-03-01T22:05:25.749Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/54/5b0db00d2cb056922356104468019c0a132e89c8d3ab67d8ede9f4483d2a/yarl-1.23.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:877b0738624280e34c55680d6054a307aa94f7d52fa0e3034a9cc6e790871da7", size = 96950, upload-time = "2026-03-01T22:05:27.318Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/40/10fa93811fd439341fad7e0718a86aca0de9548023bbb403668d6555acab/yarl-1.23.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b5405bb8f0e783a988172993cfc627e4d9d00432d6bbac65a923041edacf997d", size = 93960, upload-time = "2026-03-01T22:05:28.738Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/d2/8ae2e6cd77d0805f4526e30ec43b6f9a3dfc542d401ac4990d178e4bf0cf/yarl-1.23.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1c3a3598a832590c5a3ce56ab5576361b5688c12cb1d39429cf5dba30b510760", size = 104703, upload-time = "2026-03-01T22:05:30.438Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/0c/b3ceacf82c3fe21183ce35fa2acf5320af003d52bc1fcf5915077681142e/yarl-1.23.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8419ebd326430d1cbb7efb5292330a2cf39114e82df5cc3d83c9a0d5ebeaf2f2", size = 98325, upload-time = "2026-03-01T22:05:31.835Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/e0/12900edd28bdab91a69bd2554b85ad7b151f64e8b521fe16f9ad2f56477a/yarl-1.23.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:be61f6fff406ca40e3b1d84716fde398fc08bc63dd96d15f3a14230a0973ed86", size = 105067, upload-time = "2026-03-01T22:05:33.358Z" },
-    { url = "https://files.pythonhosted.org/packages/15/61/74bb1182cf79c9bbe4eb6b1f14a57a22d7a0be5e9cedf8e2d5c2086474c3/yarl-1.23.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3ceb13c5c858d01321b5d9bb65e4cf37a92169ea470b70fec6f236b2c9dd7e34", size = 100285, upload-time = "2026-03-01T22:05:35.4Z" },
-    { url = "https://files.pythonhosted.org/packages/69/7f/cd5ef733f2550de6241bd8bd8c3febc78158b9d75f197d9c7baa113436af/yarl-1.23.0-cp312-cp312-win32.whl", hash = "sha256:fffc45637bcd6538de8b85f51e3df3223e4ad89bccbfca0481c08c7fc8b7ed7d", size = 82359, upload-time = "2026-03-01T22:05:36.811Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/be/25216a49daeeb7af2bec0db22d5e7df08ed1d7c9f65d78b14f3b74fd72fc/yarl-1.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:f69f57305656a4852f2a7203efc661d8c042e6cc67f7acd97d8667fb448a426e", size = 87674, upload-time = "2026-03-01T22:05:38.171Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/35/aeab955d6c425b227d5b7247eafb24f2653fedc32f95373a001af5dfeb9e/yarl-1.23.0-cp312-cp312-win_arm64.whl", hash = "sha256:6e87a6e8735b44816e7db0b2fbc9686932df473c826b0d9743148432e10bb9b9", size = 81879, upload-time = "2026-03-01T22:05:40.006Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/4b/a0a6e5d0ee8a2f3a373ddef8a4097d74ac901ac363eea1440464ccbe0898/yarl-1.23.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:16c6994ac35c3e74fb0ae93323bf8b9c2a9088d55946109489667c510a7d010e", size = 123796, upload-time = "2026-03-01T22:05:41.412Z" },
-    { url = "https://files.pythonhosted.org/packages/67/b6/8925d68af039b835ae876db5838e82e76ec87b9782ecc97e192b809c4831/yarl-1.23.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4a42e651629dafb64fd5b0286a3580613702b5809ad3f24934ea87595804f2c5", size = 86547, upload-time = "2026-03-01T22:05:42.841Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/50/06d511cc4b8e0360d3c94af051a768e84b755c5eb031b12adaaab6dec6e5/yarl-1.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7c6b9461a2a8b47c65eef63bb1c76a4f1c119618ffa99ea79bc5bb1e46c5821b", size = 85854, upload-time = "2026-03-01T22:05:44.85Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/f4/4e30b250927ffdab4db70da08b9b8d2194d7c7b400167b8fbeca1e4701ca/yarl-1.23.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2569b67d616eab450d262ca7cb9f9e19d2f718c70a8b88712859359d0ab17035", size = 98351, upload-time = "2026-03-01T22:05:46.836Z" },
-    { url = "https://files.pythonhosted.org/packages/86/fc/4118c5671ea948208bdb1492d8b76bdf1453d3e73df051f939f563e7dcc5/yarl-1.23.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e9d9a4d06d3481eab79803beb4d9bd6f6a8e781ec078ac70d7ef2dcc29d1bea5", size = 92711, upload-time = "2026-03-01T22:05:48.316Z" },
-    { url = "https://files.pythonhosted.org/packages/56/11/1ed91d42bd9e73c13dc9e7eb0dd92298d75e7ac4dd7f046ad0c472e231cd/yarl-1.23.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f514f6474e04179d3d33175ed3f3e31434d3130d42ec153540d5b157deefd735", size = 106014, upload-time = "2026-03-01T22:05:50.028Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/c9/74e44e056a23fbc33aca71779ef450ca648a5bc472bdad7a82339918f818/yarl-1.23.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fda207c815b253e34f7e1909840fd14299567b1c0eb4908f8c2ce01a41265401", size = 105557, upload-time = "2026-03-01T22:05:51.416Z" },
-    { url = "https://files.pythonhosted.org/packages/66/fe/b1e10b08d287f518994f1e2ff9b6d26f0adeecd8dd7d533b01bab29a3eda/yarl-1.23.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34b6cf500e61c90f305094911f9acc9c86da1a05a7a3f5be9f68817043f486e4", size = 101559, upload-time = "2026-03-01T22:05:52.872Z" },
-    { url = "https://files.pythonhosted.org/packages/72/59/c5b8d94b14e3d3c2a9c20cb100119fd534ab5a14b93673ab4cc4a4141ea5/yarl-1.23.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d7504f2b476d21653e4d143f44a175f7f751cd41233525312696c76aa3dbb23f", size = 100502, upload-time = "2026-03-01T22:05:54.954Z" },
-    { url = "https://files.pythonhosted.org/packages/77/4f/96976cb54cbfc5c9fd73ed4c51804f92f209481d1fb190981c0f8a07a1d7/yarl-1.23.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:578110dd426f0d209d1509244e6d4a3f1a3e9077655d98c5f22583d63252a08a", size = 98027, upload-time = "2026-03-01T22:05:56.409Z" },
-    { url = "https://files.pythonhosted.org/packages/63/6e/904c4f476471afdbad6b7e5b70362fb5810e35cd7466529a97322b6f5556/yarl-1.23.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:609d3614d78d74ebe35f54953c5bbd2ac647a7ddb9c30a5d877580f5e86b22f2", size = 95369, upload-time = "2026-03-01T22:05:58.141Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/40/acfcdb3b5f9d68ef499e39e04d25e141fe90661f9d54114556cf83be8353/yarl-1.23.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4966242ec68afc74c122f8459abd597afd7d8a60dc93d695c1334c5fd25f762f", size = 105565, upload-time = "2026-03-01T22:06:00.286Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/c6/31e28f3a6ba2869c43d124f37ea5260cac9c9281df803c354b31f4dd1f3c/yarl-1.23.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:e0fd068364a6759bc794459f0a735ab151d11304346332489c7972bacbe9e72b", size = 99813, upload-time = "2026-03-01T22:06:01.712Z" },
-    { url = "https://files.pythonhosted.org/packages/08/1f/6f65f59e72d54aa467119b63fc0b0b1762eff0232db1f4720cd89e2f4a17/yarl-1.23.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:39004f0ad156da43e86aa71f44e033de68a44e5a31fc53507b36dd253970054a", size = 105632, upload-time = "2026-03-01T22:06:03.188Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/c4/18b178a69935f9e7a338127d5b77d868fdc0f0e49becd286d51b3a18c61d/yarl-1.23.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e5723c01a56c5028c807c701aa66722916d2747ad737a046853f6c46f4875543", size = 101895, upload-time = "2026-03-01T22:06:04.651Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/54/f5b870b5505663911dba950a8e4776a0dbd51c9c54c0ae88e823e4b874a0/yarl-1.23.0-cp313-cp313-win32.whl", hash = "sha256:1b6b572edd95b4fa8df75de10b04bc81acc87c1c7d16bcdd2035b09d30acc957", size = 82356, upload-time = "2026-03-01T22:06:06.04Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/84/266e8da36879c6edcd37b02b547e2d9ecdfea776be49598e75696e3316e1/yarl-1.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:baaf55442359053c7d62f6f8413a62adba3205119bcb6f49594894d8be47e5e3", size = 87515, upload-time = "2026-03-01T22:06:08.107Z" },
-    { url = "https://files.pythonhosted.org/packages/00/fd/7e1c66efad35e1649114fa13f17485f62881ad58edeeb7f49f8c5e748bf9/yarl-1.23.0-cp313-cp313-win_arm64.whl", hash = "sha256:fb4948814a2a98e3912505f09c9e7493b1506226afb1f881825368d6fb776ee3", size = 81785, upload-time = "2026-03-01T22:06:10.181Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/fc/119dd07004f17ea43bb91e3ece6587759edd7519d6b086d16bfbd3319982/yarl-1.23.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:aecfed0b41aa72b7881712c65cf764e39ce2ec352324f5e0837c7048d9e6daaa", size = 130719, upload-time = "2026-03-01T22:06:11.708Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/0d/9f2348502fbb3af409e8f47730282cd6bc80dec6630c1e06374d882d6eb2/yarl-1.23.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a41bcf68efd19073376eb8cf948b8d9be0af26256403e512bb18f3966f1f9120", size = 89690, upload-time = "2026-03-01T22:06:13.429Z" },
-    { url = "https://files.pythonhosted.org/packages/50/93/e88f3c80971b42cfc83f50a51b9d165a1dbf154b97005f2994a79f212a07/yarl-1.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cde9a2ecd91668bcb7f077c4966d8ceddb60af01b52e6e3e2680e4cf00ad1a59", size = 89851, upload-time = "2026-03-01T22:06:15.53Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/07/61c9dd8ba8f86473263b4036f70fb594c09e99c0d9737a799dfd8bc85651/yarl-1.23.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5023346c4ee7992febc0068e7593de5fa2bf611848c08404b35ebbb76b1b0512", size = 95874, upload-time = "2026-03-01T22:06:17.553Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/e9/f9ff8ceefba599eac6abddcfb0b3bee9b9e636e96dbf54342a8577252379/yarl-1.23.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1009abedb49ae95b136a8904a3f71b342f849ffeced2d3747bf29caeda218c4", size = 88710, upload-time = "2026-03-01T22:06:19.004Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/78/0231bfcc5d4c8eec220bc2f9ef82cb4566192ea867a7c5b4148f44f6cbcd/yarl-1.23.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a8d00f29b42f534cc8aa3931cfe773b13b23e561e10d2b26f27a8d309b0e82a1", size = 101033, upload-time = "2026-03-01T22:06:21.203Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/9b/30ea5239a61786f18fd25797151a17fbb3be176977187a48d541b5447dd4/yarl-1.23.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:95451e6ce06c3e104556d73b559f5da6c34a069b6b62946d3ad66afcd51642ea", size = 100817, upload-time = "2026-03-01T22:06:22.738Z" },
-    { url = "https://files.pythonhosted.org/packages/62/e2/a4980481071791bc83bce2b7a1a1f7adcabfa366007518b4b845e92eeee3/yarl-1.23.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:531ef597132086b6cf96faa7c6c1dcd0361dd5f1694e5cc30375907b9b7d3ea9", size = 97482, upload-time = "2026-03-01T22:06:24.21Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/1e/304a00cf5f6100414c4b5a01fc7ff9ee724b62158a08df2f8170dfc72a2d/yarl-1.23.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:88f9fb0116fbfcefcab70f85cf4b74a2b6ce5d199c41345296f49d974ddb4123", size = 95949, upload-time = "2026-03-01T22:06:25.697Z" },
-    { url = "https://files.pythonhosted.org/packages/68/03/093f4055ed4cae649ac53bca3d180bd37102e9e11d048588e9ab0c0108d0/yarl-1.23.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e7b0460976dc75cb87ad9cc1f9899a4b97751e7d4e77ab840fc9b6d377b8fd24", size = 95839, upload-time = "2026-03-01T22:06:27.309Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/28/4c75ebb108f322aa8f917ae10a8ffa4f07cae10a8a627b64e578617df6a0/yarl-1.23.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:115136c4a426f9da976187d238e84139ff6b51a20839aa6e3720cd1026d768de", size = 90696, upload-time = "2026-03-01T22:06:29.048Z" },
-    { url = "https://files.pythonhosted.org/packages/23/9c/42c2e2dd91c1a570402f51bdf066bfdb1241c2240ba001967bad778e77b7/yarl-1.23.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:ead11956716a940c1abc816b7df3fa2b84d06eaed8832ca32f5c5e058c65506b", size = 100865, upload-time = "2026-03-01T22:06:30.525Z" },
-    { url = "https://files.pythonhosted.org/packages/74/05/1bcd60a8a0a914d462c305137246b6f9d167628d73568505fce3f1cb2e65/yarl-1.23.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:fe8f8f5e70e6dbdfca9882cd9deaac058729bcf323cf7a58660901e55c9c94f6", size = 96234, upload-time = "2026-03-01T22:06:32.692Z" },
-    { url = "https://files.pythonhosted.org/packages/90/b2/f52381aac396d6778ce516b7bc149c79e65bfc068b5de2857ab69eeea3b7/yarl-1.23.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:a0e317df055958a0c1e79e5d2aa5a5eaa4a6d05a20d4b0c9c3f48918139c9fc6", size = 100295, upload-time = "2026-03-01T22:06:34.268Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/e8/638bae5bbf1113a659b2435d8895474598afe38b4a837103764f603aba56/yarl-1.23.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f0fd84de0c957b2d280143522c4f91a73aada1923caee763e24a2b3fda9f8a5", size = 97784, upload-time = "2026-03-01T22:06:35.864Z" },
-    { url = "https://files.pythonhosted.org/packages/80/25/a3892b46182c586c202629fc2159aa13975d3741d52ebd7347fd501d48d5/yarl-1.23.0-cp313-cp313t-win32.whl", hash = "sha256:93a784271881035ab4406a172edb0faecb6e7d00f4b53dc2f55919d6c9688595", size = 88313, upload-time = "2026-03-01T22:06:37.39Z" },
-    { url = "https://files.pythonhosted.org/packages/43/68/8c5b36aa5178900b37387937bc2c2fe0e9505537f713495472dcf6f6fccc/yarl-1.23.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dd00607bffbf30250fe108065f07453ec124dbf223420f57f5e749b04295e090", size = 94932, upload-time = "2026-03-01T22:06:39.579Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/cc/d79ba8292f51f81f4dc533a8ccfb9fc6992cabf0998ed3245de7589dc07c/yarl-1.23.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ac09d42f48f80c9ee1635b2fcaa819496a44502737660d3c0f2ade7526d29144", size = 84786, upload-time = "2026-03-01T22:06:41.988Z" },
-    { url = "https://files.pythonhosted.org/packages/90/98/b85a038d65d1b92c3903ab89444f48d3cee490a883477b716d7a24b1a78c/yarl-1.23.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:21d1b7305a71a15b4794b5ff22e8eef96ff4a6d7f9657155e5aa419444b28912", size = 124455, upload-time = "2026-03-01T22:06:43.615Z" },
-    { url = "https://files.pythonhosted.org/packages/39/54/bc2b45559f86543d163b6e294417a107bb87557609007c007ad889afec18/yarl-1.23.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:85610b4f27f69984932a7abbe52703688de3724d9f72bceb1cca667deff27474", size = 86752, upload-time = "2026-03-01T22:06:45.425Z" },
-    { url = "https://files.pythonhosted.org/packages/24/f9/e8242b68362bffe6fb536c8db5076861466fc780f0f1b479fc4ffbebb128/yarl-1.23.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:23f371bd662cf44a7630d4d113101eafc0cfa7518a2760d20760b26021454719", size = 86291, upload-time = "2026-03-01T22:06:46.974Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/d8/d1cb2378c81dd729e98c716582b1ccb08357e8488e4c24714658cc6630e8/yarl-1.23.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4a80f77dc1acaaa61f0934176fccca7096d9b1ff08c8ba9cddf5ae034a24319", size = 99026, upload-time = "2026-03-01T22:06:48.459Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/ff/7196790538f31debe3341283b5b0707e7feb947620fc5e8236ef28d44f72/yarl-1.23.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:bd654fad46d8d9e823afbb4f87c79160b5a374ed1ff5bde24e542e6ba8f41434", size = 92355, upload-time = "2026-03-01T22:06:50.306Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/56/25d58c3eddde825890a5fe6aa1866228377354a3c39262235234ab5f616b/yarl-1.23.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:682bae25f0a0dd23a056739f23a134db9f52a63e2afd6bfb37ddc76292bbd723", size = 106417, upload-time = "2026-03-01T22:06:52.1Z" },
-    { url = "https://files.pythonhosted.org/packages/51/8a/882c0e7bc8277eb895b31bce0138f51a1ba551fc2e1ec6753ffc1e7c1377/yarl-1.23.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a82836cab5f197a0514235aaf7ffccdc886ccdaa2324bc0aafdd4ae898103039", size = 106422, upload-time = "2026-03-01T22:06:54.424Z" },
-    { url = "https://files.pythonhosted.org/packages/42/2b/fef67d616931055bf3d6764885990a3ac647d68734a2d6a9e1d13de437a2/yarl-1.23.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c57676bdedc94cd3bc37724cf6f8cd2779f02f6aba48de45feca073e714fe52", size = 101915, upload-time = "2026-03-01T22:06:55.895Z" },
-    { url = "https://files.pythonhosted.org/packages/18/6a/530e16aebce27c5937920f3431c628a29a4b6b430fab3fd1c117b26ff3f6/yarl-1.23.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c7f8dc16c498ff06497c015642333219871effba93e4a2e8604a06264aca5c5c", size = 100690, upload-time = "2026-03-01T22:06:58.21Z" },
-    { url = "https://files.pythonhosted.org/packages/88/08/93749219179a45e27b036e03260fda05190b911de8e18225c294ac95bbc9/yarl-1.23.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5ee586fb17ff8f90c91cf73c6108a434b02d69925f44f5f8e0d7f2f260607eae", size = 98750, upload-time = "2026-03-01T22:06:59.794Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/cf/ea424a004969f5d81a362110a6ac1496d79efdc6d50c2c4b2e3ea0fc2519/yarl-1.23.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:17235362f580149742739cc3828b80e24029d08cbb9c4bda0242c7b5bc610a8e", size = 94685, upload-time = "2026-03-01T22:07:01.375Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/b7/14341481fe568e2b0408bcf1484c652accafe06a0ade9387b5d3fd9df446/yarl-1.23.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:0793e2bd0cf14234983bbb371591e6bea9e876ddf6896cdcc93450996b0b5c85", size = 106009, upload-time = "2026-03-01T22:07:03.151Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/e6/5c744a9b54f4e8007ad35bce96fbc9218338e84812d36f3390cea616881a/yarl-1.23.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:3650dc2480f94f7116c364096bc84b1d602f44224ef7d5c7208425915c0475dd", size = 100033, upload-time = "2026-03-01T22:07:04.701Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/23/e3bfc188d0b400f025bc49d99793d02c9abe15752138dcc27e4eaf0c4a9e/yarl-1.23.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f40e782d49630ad384db66d4d8b73ff4f1b8955dc12e26b09a3e3af064b3b9d6", size = 106483, upload-time = "2026-03-01T22:07:06.231Z" },
-    { url = "https://files.pythonhosted.org/packages/72/42/f0505f949a90b3f8b7a363d6cbdf398f6e6c58946d85c6d3a3bc70595b26/yarl-1.23.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94f8575fbdf81749008d980c17796097e645574a3b8c28ee313931068dad14fe", size = 102175, upload-time = "2026-03-01T22:07:08.4Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/65/b39290f1d892a9dd671d1c722014ca062a9c35d60885d57e5375db0404b5/yarl-1.23.0-cp314-cp314-win32.whl", hash = "sha256:c8aa34a5c864db1087d911a0b902d60d203ea3607d91f615acd3f3108ac32169", size = 83871, upload-time = "2026-03-01T22:07:09.968Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/5b/9b92f54c784c26e2a422e55a8d2607ab15b7ea3349e28359282f84f01d43/yarl-1.23.0-cp314-cp314-win_amd64.whl", hash = "sha256:63e92247f383c85ab00dd0091e8c3fa331a96e865459f5ee80353c70a4a42d70", size = 89093, upload-time = "2026-03-01T22:07:11.501Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/7d/8a84dc9381fd4412d5e7ff04926f9865f6372b4c2fd91e10092e65d29eb8/yarl-1.23.0-cp314-cp314-win_arm64.whl", hash = "sha256:70efd20be968c76ece7baa8dafe04c5be06abc57f754d6f36f3741f7aa7a208e", size = 83384, upload-time = "2026-03-01T22:07:13.069Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/8d/d2fad34b1c08aa161b74394183daa7d800141aaaee207317e82c790b418d/yarl-1.23.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:9a18d6f9359e45722c064c97464ec883eb0e0366d33eda61cb19a244bf222679", size = 131019, upload-time = "2026-03-01T22:07:14.903Z" },
-    { url = "https://files.pythonhosted.org/packages/19/ff/33009a39d3ccf4b94d7d7880dfe17fb5816c5a4fe0096d9b56abceea9ac7/yarl-1.23.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2803ed8b21ca47a43da80a6fd1ed3019d30061f7061daa35ac54f63933409412", size = 89894, upload-time = "2026-03-01T22:07:17.372Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/f1/dab7ac5e7306fb79c0190766a3c00b4cb8d09a1f390ded68c85a5934faf5/yarl-1.23.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:394906945aa8b19fc14a61cf69743a868bb8c465efe85eee687109cc540b98f4", size = 89979, upload-time = "2026-03-01T22:07:19.361Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/b1/08e95f3caee1fad6e65017b9f26c1d79877b502622d60e517de01e72f95d/yarl-1.23.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:71d006bee8397a4a89f469b8deb22469fe7508132d3c17fa6ed871e79832691c", size = 95943, upload-time = "2026-03-01T22:07:21.266Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/cc/6409f9018864a6aa186c61175b977131f373f1988e198e031236916e87e4/yarl-1.23.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:62694e275c93d54f7ccedcfef57d42761b2aad5234b6be1f3e3026cae4001cd4", size = 88786, upload-time = "2026-03-01T22:07:23.129Z" },
-    { url = "https://files.pythonhosted.org/packages/76/40/cc22d1d7714b717fde2006fad2ced5efe5580606cb059ae42117542122f3/yarl-1.23.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a31de1613658308efdb21ada98cbc86a97c181aa050ba22a808120bb5be3ab94", size = 101307, upload-time = "2026-03-01T22:07:24.689Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/0d/476c38e85ddb4c6ec6b20b815bdd779aa386a013f3d8b85516feee55c8dc/yarl-1.23.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fb1e8b8d66c278b21d13b0a7ca22c41dd757a7c209c6b12c313e445c31dd3b28", size = 100904, upload-time = "2026-03-01T22:07:26.287Z" },
-    { url = "https://files.pythonhosted.org/packages/72/32/0abe4a76d59adf2081dcb0397168553ece4616ada1c54d1c49d8936c74f8/yarl-1.23.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50f9d8d531dfb767c565f348f33dd5139a6c43f5cbdf3f67da40d54241df93f6", size = 97728, upload-time = "2026-03-01T22:07:27.906Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/35/7b30f4810fba112f60f5a43237545867504e15b1c7647a785fbaf588fac2/yarl-1.23.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:575aa4405a656e61a540f4a80eaa5260f2a38fff7bfdc4b5f611840d76e9e277", size = 95964, upload-time = "2026-03-01T22:07:30.198Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/86/ed7a73ab85ef00e8bb70b0cb5421d8a2a625b81a333941a469a6f4022828/yarl-1.23.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:041b1a4cefacf65840b4e295c6985f334ba83c30607441ae3cf206a0eed1a2e4", size = 95882, upload-time = "2026-03-01T22:07:32.132Z" },
-    { url = "https://files.pythonhosted.org/packages/19/90/d56967f61a29d8498efb7afb651e0b2b422a1e9b47b0ab5f4e40a19b699b/yarl-1.23.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:d38c1e8231722c4ce40d7593f28d92b5fc72f3e9774fe73d7e800ec32299f63a", size = 90797, upload-time = "2026-03-01T22:07:34.404Z" },
-    { url = "https://files.pythonhosted.org/packages/72/00/8b8f76909259f56647adb1011d7ed8b321bcf97e464515c65016a47ecdf0/yarl-1.23.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:d53834e23c015ee83a99377db6e5e37d8484f333edb03bd15b4bc312cc7254fb", size = 101023, upload-time = "2026-03-01T22:07:35.953Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/e2/cab11b126fb7d440281b7df8e9ddbe4851e70a4dde47a202b6642586b8d9/yarl-1.23.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:2e27c8841126e017dd2a054a95771569e6070b9ee1b133366d8b31beb5018a41", size = 96227, upload-time = "2026-03-01T22:07:37.594Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/9b/2c893e16bfc50e6b2edf76c1a9eb6cb0c744346197e74c65e99ad8d634d0/yarl-1.23.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:76855800ac56f878847a09ce6dba727c93ca2d89c9e9d63002d26b916810b0a2", size = 100302, upload-time = "2026-03-01T22:07:39.334Z" },
-    { url = "https://files.pythonhosted.org/packages/28/ec/5498c4e3a6d5f1003beb23405671c2eb9cdbf3067d1c80f15eeafe301010/yarl-1.23.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e09fd068c2e169a7070d83d3bde728a4d48de0549f975290be3c108c02e499b4", size = 98202, upload-time = "2026-03-01T22:07:41.717Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/c3/cd737e2d45e70717907f83e146f6949f20cc23cd4bf7b2688727763aa458/yarl-1.23.0-cp314-cp314t-win32.whl", hash = "sha256:73309162a6a571d4cbd3b6a1dcc703c7311843ae0d1578df6f09be4e98df38d4", size = 90558, upload-time = "2026-03-01T22:07:43.433Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/19/3774d162f6732d1cfb0b47b4140a942a35ca82bb19b6db1f80e9e7bdc8f8/yarl-1.23.0-cp314-cp314t-win_amd64.whl", hash = "sha256:4503053d296bc6e4cbd1fad61cf3b6e33b939886c4f249ba7c78b602214fabe2", size = 97610, upload-time = "2026-03-01T22:07:45.773Z" },
-    { url = "https://files.pythonhosted.org/packages/51/47/3fa2286c3cb162c71cdb34c4224d5745a1ceceb391b2bd9b19b668a8d724/yarl-1.23.0-cp314-cp314t-win_arm64.whl", hash = "sha256:44bb7bef4ea409384e3f8bc36c063d77ea1b8d4a5b2706956c0d6695f07dcc25", size = 86041, upload-time = "2026-03-01T22:07:49.026Z" },
-    { url = "https://files.pythonhosted.org/packages/69/68/c8739671f5699c7dc470580a4f821ef37c32c4cb0b047ce223a7f115757f/yarl-1.23.0-py3-none-any.whl", hash = "sha256:a2df6afe50dea8ae15fa34c9f824a3ee958d785fd5d089063d960bae1daa0a3f", size = 48288, upload-time = "2026-03-01T22:07:51.388Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/57/63/0c6ebca57330cd313f6102b16dd57ffaf3ec4c83403dcb45dbd15c6f3ea1/yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71", size = 187169, upload-time = "2025-10-06T14:12:55.963Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/43/a2204825342f37c337f5edb6637040fa14e365b2fcc2346960201d457579/yarl-1.22.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c7bd6683587567e5a49ee6e336e0612bec8329be1b7d4c8af5687dcdeb67ee1e", size = 140517, upload-time = "2025-10-06T14:08:42.494Z" },
+    { url = "https://files.pythonhosted.org/packages/44/6f/674f3e6f02266428c56f704cd2501c22f78e8b2eeb23f153117cc86fb28a/yarl-1.22.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5cdac20da754f3a723cceea5b3448e1a2074866406adeb4ef35b469d089adb8f", size = 93495, upload-time = "2025-10-06T14:08:46.2Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/12/5b274d8a0f30c07b91b2f02cba69152600b47830fcfb465c108880fcee9c/yarl-1.22.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:07a524d84df0c10f41e3ee918846e1974aba4ec017f990dc735aad487a0bdfdf", size = 94400, upload-time = "2025-10-06T14:08:47.855Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/7f/df1b6949b1fa1aa9ff6de6e2631876ad4b73c4437822026e85d8acb56bb1/yarl-1.22.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1b329cb8146d7b736677a2440e422eadd775d1806a81db2d4cded80a48efc1a", size = 347545, upload-time = "2025-10-06T14:08:49.683Z" },
+    { url = "https://files.pythonhosted.org/packages/84/09/f92ed93bd6cd77872ab6c3462df45ca45cd058d8f1d0c9b4f54c1704429f/yarl-1.22.0-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:75976c6945d85dbb9ee6308cd7ff7b1fb9409380c82d6119bd778d8fcfe2931c", size = 319598, upload-time = "2025-10-06T14:08:51.215Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/97/ac3f3feae7d522cf7ccec3d340bb0b2b61c56cb9767923df62a135092c6b/yarl-1.22.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:80ddf7a5f8c86cb3eb4bc9028b07bbbf1f08a96c5c0bc1244be5e8fefcb94147", size = 363893, upload-time = "2025-10-06T14:08:53.144Z" },
+    { url = "https://files.pythonhosted.org/packages/06/49/f3219097403b9c84a4d079b1d7bda62dd9b86d0d6e4428c02d46ab2c77fc/yarl-1.22.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d332fc2e3c94dad927f2112395772a4e4fedbcf8f80efc21ed7cdfae4d574fdb", size = 371240, upload-time = "2025-10-06T14:08:55.036Z" },
+    { url = "https://files.pythonhosted.org/packages/35/9f/06b765d45c0e44e8ecf0fe15c9eacbbde342bb5b7561c46944f107bfb6c3/yarl-1.22.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0cf71bf877efeac18b38d3930594c0948c82b64547c1cf420ba48722fe5509f6", size = 346965, upload-time = "2025-10-06T14:08:56.722Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/69/599e7cea8d0fcb1694323b0db0dda317fa3162f7b90166faddecf532166f/yarl-1.22.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:663e1cadaddae26be034a6ab6072449a8426ddb03d500f43daf952b74553bba0", size = 342026, upload-time = "2025-10-06T14:08:58.563Z" },
+    { url = "https://files.pythonhosted.org/packages/95/6f/9dfd12c8bc90fea9eab39832ee32ea48f8e53d1256252a77b710c065c89f/yarl-1.22.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:6dcbb0829c671f305be48a7227918cfcd11276c2d637a8033a99a02b67bf9eda", size = 335637, upload-time = "2025-10-06T14:09:00.506Z" },
+    { url = "https://files.pythonhosted.org/packages/57/2e/34c5b4eb9b07e16e873db5b182c71e5f06f9b5af388cdaa97736d79dd9a6/yarl-1.22.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:f0d97c18dfd9a9af4490631905a3f131a8e4c9e80a39353919e2cfed8f00aedc", size = 359082, upload-time = "2025-10-06T14:09:01.936Z" },
+    { url = "https://files.pythonhosted.org/packages/31/71/fa7e10fb772d273aa1f096ecb8ab8594117822f683bab7d2c5a89914c92a/yarl-1.22.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:437840083abe022c978470b942ff832c3940b2ad3734d424b7eaffcd07f76737", size = 357811, upload-time = "2025-10-06T14:09:03.445Z" },
+    { url = "https://files.pythonhosted.org/packages/26/da/11374c04e8e1184a6a03cf9c8f5688d3e5cec83ed6f31ad3481b3207f709/yarl-1.22.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a899cbd98dce6f5d8de1aad31cb712ec0a530abc0a86bd6edaa47c1090138467", size = 351223, upload-time = "2025-10-06T14:09:05.401Z" },
+    { url = "https://files.pythonhosted.org/packages/82/8f/e2d01f161b0c034a30410e375e191a5d27608c1f8693bab1a08b089ca096/yarl-1.22.0-cp310-cp310-win32.whl", hash = "sha256:595697f68bd1f0c1c159fcb97b661fc9c3f5db46498043555d04805430e79bea", size = 82118, upload-time = "2025-10-06T14:09:11.148Z" },
+    { url = "https://files.pythonhosted.org/packages/62/46/94c76196642dbeae634c7a61ba3da88cd77bed875bf6e4a8bed037505aa6/yarl-1.22.0-cp310-cp310-win_amd64.whl", hash = "sha256:cb95a9b1adaa48e41815a55ae740cfda005758104049a640a398120bf02515ca", size = 86852, upload-time = "2025-10-06T14:09:12.958Z" },
+    { url = "https://files.pythonhosted.org/packages/af/af/7df4f179d3b1a6dcb9a4bd2ffbc67642746fcafdb62580e66876ce83fff4/yarl-1.22.0-cp310-cp310-win_arm64.whl", hash = "sha256:b85b982afde6df99ecc996990d4ad7ccbdbb70e2a4ba4de0aecde5922ba98a0b", size = 82012, upload-time = "2025-10-06T14:09:14.664Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/27/5ab13fc84c76a0250afd3d26d5936349a35be56ce5785447d6c423b26d92/yarl-1.22.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1ab72135b1f2db3fed3997d7e7dc1b80573c67138023852b6efb336a5eae6511", size = 141607, upload-time = "2025-10-06T14:09:16.298Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/a1/d065d51d02dc02ce81501d476b9ed2229d9a990818332242a882d5d60340/yarl-1.22.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:669930400e375570189492dc8d8341301578e8493aec04aebc20d4717f899dd6", size = 94027, upload-time = "2025-10-06T14:09:17.786Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/da/8da9f6a53f67b5106ffe902c6fa0164e10398d4e150d85838b82f424072a/yarl-1.22.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:792a2af6d58177ef7c19cbf0097aba92ca1b9cb3ffdd9c7470e156c8f9b5e028", size = 94963, upload-time = "2025-10-06T14:09:19.662Z" },
+    { url = "https://files.pythonhosted.org/packages/68/fe/2c1f674960c376e29cb0bec1249b117d11738db92a6ccc4a530b972648db/yarl-1.22.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea66b1c11c9150f1372f69afb6b8116f2dd7286f38e14ea71a44eee9ec51b9d", size = 368406, upload-time = "2025-10-06T14:09:21.402Z" },
+    { url = "https://files.pythonhosted.org/packages/95/26/812a540e1c3c6418fec60e9bbd38e871eaba9545e94fa5eff8f4a8e28e1e/yarl-1.22.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3e2daa88dc91870215961e96a039ec73e4937da13cf77ce17f9cad0c18df3503", size = 336581, upload-time = "2025-10-06T14:09:22.98Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/f5/5777b19e26fdf98563985e481f8be3d8a39f8734147a6ebf459d0dab5a6b/yarl-1.22.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba440ae430c00eee41509353628600212112cd5018d5def7e9b05ea7ac34eb65", size = 388924, upload-time = "2025-10-06T14:09:24.655Z" },
+    { url = "https://files.pythonhosted.org/packages/86/08/24bd2477bd59c0bbd994fe1d93b126e0472e4e3df5a96a277b0a55309e89/yarl-1.22.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e6438cc8f23a9c1478633d216b16104a586b9761db62bfacb6425bac0a36679e", size = 392890, upload-time = "2025-10-06T14:09:26.617Z" },
+    { url = "https://files.pythonhosted.org/packages/46/00/71b90ed48e895667ecfb1eaab27c1523ee2fa217433ed77a73b13205ca4b/yarl-1.22.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c52a6e78aef5cf47a98ef8e934755abf53953379b7d53e68b15ff4420e6683d", size = 365819, upload-time = "2025-10-06T14:09:28.544Z" },
+    { url = "https://files.pythonhosted.org/packages/30/2d/f715501cae832651d3282387c6a9236cd26bd00d0ff1e404b3dc52447884/yarl-1.22.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3b06bcadaac49c70f4c88af4ffcfbe3dc155aab3163e75777818092478bcbbe7", size = 363601, upload-time = "2025-10-06T14:09:30.568Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/f9/a678c992d78e394e7126ee0b0e4e71bd2775e4334d00a9278c06a6cce96a/yarl-1.22.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:6944b2dc72c4d7f7052683487e3677456050ff77fcf5e6204e98caf785ad1967", size = 358072, upload-time = "2025-10-06T14:09:32.528Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/d1/b49454411a60edb6fefdcad4f8e6dbba7d8019e3a508a1c5836cba6d0781/yarl-1.22.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d5372ca1df0f91a86b047d1277c2aaf1edb32d78bbcefffc81b40ffd18f027ed", size = 385311, upload-time = "2025-10-06T14:09:34.634Z" },
+    { url = "https://files.pythonhosted.org/packages/87/e5/40d7a94debb8448c7771a916d1861d6609dddf7958dc381117e7ba36d9e8/yarl-1.22.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:51af598701f5299012b8416486b40fceef8c26fc87dc6d7d1f6fc30609ea0aa6", size = 381094, upload-time = "2025-10-06T14:09:36.268Z" },
+    { url = "https://files.pythonhosted.org/packages/35/d8/611cc282502381ad855448643e1ad0538957fc82ae83dfe7762c14069e14/yarl-1.22.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b266bd01fedeffeeac01a79ae181719ff848a5a13ce10075adbefc8f1daee70e", size = 370944, upload-time = "2025-10-06T14:09:37.872Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/df/fadd00fb1c90e1a5a8bd731fa3d3de2e165e5a3666a095b04e31b04d9cb6/yarl-1.22.0-cp311-cp311-win32.whl", hash = "sha256:a9b1ba5610a4e20f655258d5a1fdc7ebe3d837bb0e45b581398b99eb98b1f5ca", size = 81804, upload-time = "2025-10-06T14:09:39.359Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/f7/149bb6f45f267cb5c074ac40c01c6b3ea6d8a620d34b337f6321928a1b4d/yarl-1.22.0-cp311-cp311-win_amd64.whl", hash = "sha256:078278b9b0b11568937d9509b589ee83ef98ed6d561dfe2020e24a9fd08eaa2b", size = 86858, upload-time = "2025-10-06T14:09:41.068Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/13/88b78b93ad3f2f0b78e13bfaaa24d11cbc746e93fe76d8c06bf139615646/yarl-1.22.0-cp311-cp311-win_arm64.whl", hash = "sha256:b6a6f620cfe13ccec221fa312139135166e47ae169f8253f72a0abc0dae94376", size = 81637, upload-time = "2025-10-06T14:09:42.712Z" },
+    { url = "https://files.pythonhosted.org/packages/75/ff/46736024fee3429b80a165a732e38e5d5a238721e634ab41b040d49f8738/yarl-1.22.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e340382d1afa5d32b892b3ff062436d592ec3d692aeea3bef3a5cfe11bbf8c6f", size = 142000, upload-time = "2025-10-06T14:09:44.631Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/9a/b312ed670df903145598914770eb12de1bac44599549b3360acc96878df8/yarl-1.22.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f1e09112a2c31ffe8d80be1b0988fa6a18c5d5cad92a9ffbb1c04c91bfe52ad2", size = 94338, upload-time = "2025-10-06T14:09:46.372Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/f5/0601483296f09c3c65e303d60c070a5c19fcdbc72daa061e96170785bc7d/yarl-1.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:939fe60db294c786f6b7c2d2e121576628468f65453d86b0fe36cb52f987bd74", size = 94909, upload-time = "2025-10-06T14:09:48.648Z" },
+    { url = "https://files.pythonhosted.org/packages/60/41/9a1fe0b73dbcefce72e46cf149b0e0a67612d60bfc90fb59c2b2efdfbd86/yarl-1.22.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1651bf8e0398574646744c1885a41198eba53dc8a9312b954073f845c90a8df", size = 372940, upload-time = "2025-10-06T14:09:50.089Z" },
+    { url = "https://files.pythonhosted.org/packages/17/7a/795cb6dfee561961c30b800f0ed616b923a2ec6258b5def2a00bf8231334/yarl-1.22.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b8a0588521a26bf92a57a1705b77b8b59044cdceccac7151bd8d229e66b8dedb", size = 345825, upload-time = "2025-10-06T14:09:52.142Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/93/a58f4d596d2be2ae7bab1a5846c4d270b894958845753b2c606d666744d3/yarl-1.22.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:42188e6a615c1a75bcaa6e150c3fe8f3e8680471a6b10150c5f7e83f47cc34d2", size = 386705, upload-time = "2025-10-06T14:09:54.128Z" },
+    { url = "https://files.pythonhosted.org/packages/61/92/682279d0e099d0e14d7fd2e176bd04f48de1484f56546a3e1313cd6c8e7c/yarl-1.22.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f6d2cb59377d99718913ad9a151030d6f83ef420a2b8f521d94609ecc106ee82", size = 396518, upload-time = "2025-10-06T14:09:55.762Z" },
+    { url = "https://files.pythonhosted.org/packages/db/0f/0d52c98b8a885aeda831224b78f3be7ec2e1aa4a62091f9f9188c3c65b56/yarl-1.22.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50678a3b71c751d58d7908edc96d332af328839eea883bb554a43f539101277a", size = 377267, upload-time = "2025-10-06T14:09:57.958Z" },
+    { url = "https://files.pythonhosted.org/packages/22/42/d2685e35908cbeaa6532c1fc73e89e7f2efb5d8a7df3959ea8e37177c5a3/yarl-1.22.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e8fbaa7cec507aa24ea27a01456e8dd4b6fab829059b69844bd348f2d467124", size = 365797, upload-time = "2025-10-06T14:09:59.527Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/83/cf8c7bcc6355631762f7d8bdab920ad09b82efa6b722999dfb05afa6cfac/yarl-1.22.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:433885ab5431bc3d3d4f2f9bd15bfa1614c522b0f1405d62c4f926ccd69d04fa", size = 365535, upload-time = "2025-10-06T14:10:01.139Z" },
+    { url = "https://files.pythonhosted.org/packages/25/e1/5302ff9b28f0c59cac913b91fe3f16c59a033887e57ce9ca5d41a3a94737/yarl-1.22.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b790b39c7e9a4192dc2e201a282109ed2985a1ddbd5ac08dc56d0e121400a8f7", size = 382324, upload-time = "2025-10-06T14:10:02.756Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/cd/4617eb60f032f19ae3a688dc990d8f0d89ee0ea378b61cac81ede3e52fae/yarl-1.22.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:31f0b53913220599446872d757257be5898019c85e7971599065bc55065dc99d", size = 383803, upload-time = "2025-10-06T14:10:04.552Z" },
+    { url = "https://files.pythonhosted.org/packages/59/65/afc6e62bb506a319ea67b694551dab4a7e6fb7bf604e9bd9f3e11d575fec/yarl-1.22.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a49370e8f711daec68d09b821a34e1167792ee2d24d405cbc2387be4f158b520", size = 374220, upload-time = "2025-10-06T14:10:06.489Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/3d/68bf18d50dc674b942daec86a9ba922d3113d8399b0e52b9897530442da2/yarl-1.22.0-cp312-cp312-win32.whl", hash = "sha256:70dfd4f241c04bd9239d53b17f11e6ab672b9f1420364af63e8531198e3f5fe8", size = 81589, upload-time = "2025-10-06T14:10:09.254Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/9a/6ad1a9b37c2f72874f93e691b2e7ecb6137fb2b899983125db4204e47575/yarl-1.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:8884d8b332a5e9b88e23f60bb166890009429391864c685e17bd73a9eda9105c", size = 87213, upload-time = "2025-10-06T14:10:11.369Z" },
+    { url = "https://files.pythonhosted.org/packages/44/c5/c21b562d1680a77634d748e30c653c3ca918beb35555cff24986fff54598/yarl-1.22.0-cp312-cp312-win_arm64.whl", hash = "sha256:ea70f61a47f3cc93bdf8b2f368ed359ef02a01ca6393916bc8ff877427181e74", size = 81330, upload-time = "2025-10-06T14:10:13.112Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/f3/d67de7260456ee105dc1d162d43a019ecad6b91e2f51809d6cddaa56690e/yarl-1.22.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8dee9c25c74997f6a750cd317b8ca63545169c098faee42c84aa5e506c819b53", size = 139980, upload-time = "2025-10-06T14:10:14.601Z" },
+    { url = "https://files.pythonhosted.org/packages/01/88/04d98af0b47e0ef42597b9b28863b9060bb515524da0a65d5f4db160b2d5/yarl-1.22.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01e73b85a5434f89fc4fe27dcda2aff08ddf35e4d47bbbea3bdcd25321af538a", size = 93424, upload-time = "2025-10-06T14:10:16.115Z" },
+    { url = "https://files.pythonhosted.org/packages/18/91/3274b215fd8442a03975ce6bee5fe6aa57a8326b29b9d3d56234a1dca244/yarl-1.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:22965c2af250d20c873cdbee8ff958fb809940aeb2e74ba5f20aaf6b7ac8c70c", size = 93821, upload-time = "2025-10-06T14:10:17.993Z" },
+    { url = "https://files.pythonhosted.org/packages/61/3a/caf4e25036db0f2da4ca22a353dfeb3c9d3c95d2761ebe9b14df8fc16eb0/yarl-1.22.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4f15793aa49793ec8d1c708ab7f9eded1aa72edc5174cae703651555ed1b601", size = 373243, upload-time = "2025-10-06T14:10:19.44Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/9e/51a77ac7516e8e7803b06e01f74e78649c24ee1021eca3d6a739cb6ea49c/yarl-1.22.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5542339dcf2747135c5c85f68680353d5cb9ffd741c0f2e8d832d054d41f35a", size = 342361, upload-time = "2025-10-06T14:10:21.124Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/f8/33b92454789dde8407f156c00303e9a891f1f51a0330b0fad7c909f87692/yarl-1.22.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5c401e05ad47a75869c3ab3e35137f8468b846770587e70d71e11de797d113df", size = 387036, upload-time = "2025-10-06T14:10:22.902Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/9a/c5db84ea024f76838220280f732970aa4ee154015d7f5c1bfb60a267af6f/yarl-1.22.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:243dda95d901c733f5b59214d28b0120893d91777cb8aa043e6ef059d3cddfe2", size = 397671, upload-time = "2025-10-06T14:10:24.523Z" },
+    { url = "https://files.pythonhosted.org/packages/11/c9/cd8538dc2e7727095e0c1d867bad1e40c98f37763e6d995c1939f5fdc7b1/yarl-1.22.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bec03d0d388060058f5d291a813f21c011041938a441c593374da6077fe21b1b", size = 377059, upload-time = "2025-10-06T14:10:26.406Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/b9/ab437b261702ced75122ed78a876a6dec0a1b0f5e17a4ac7a9a2482d8abe/yarl-1.22.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0748275abb8c1e1e09301ee3cf90c8a99678a4e92e4373705f2a2570d581273", size = 365356, upload-time = "2025-10-06T14:10:28.461Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/9d/8e1ae6d1d008a9567877b08f0ce4077a29974c04c062dabdb923ed98e6fe/yarl-1.22.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:47fdb18187e2a4e18fda2c25c05d8251a9e4a521edaed757fef033e7d8498d9a", size = 361331, upload-time = "2025-10-06T14:10:30.541Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/5a/09b7be3905962f145b73beb468cdd53db8aa171cf18c80400a54c5b82846/yarl-1.22.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c7044802eec4524fde550afc28edda0dd5784c4c45f0be151a2d3ba017daca7d", size = 382590, upload-time = "2025-10-06T14:10:33.352Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/7f/59ec509abf90eda5048b0bc3e2d7b5099dffdb3e6b127019895ab9d5ef44/yarl-1.22.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:139718f35149ff544caba20fce6e8a2f71f1e39b92c700d8438a0b1d2a631a02", size = 385316, upload-time = "2025-10-06T14:10:35.034Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/84/891158426bc8036bfdfd862fabd0e0fa25df4176ec793e447f4b85cf1be4/yarl-1.22.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e1b51bebd221006d3d2f95fbe124b22b247136647ae5dcc8c7acafba66e5ee67", size = 374431, upload-time = "2025-10-06T14:10:37.76Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/49/03da1580665baa8bef5e8ed34c6df2c2aca0a2f28bf397ed238cc1bbc6f2/yarl-1.22.0-cp313-cp313-win32.whl", hash = "sha256:d3e32536234a95f513bd374e93d717cf6b2231a791758de6c509e3653f234c95", size = 81555, upload-time = "2025-10-06T14:10:39.649Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/ee/450914ae11b419eadd067c6183ae08381cfdfcb9798b90b2b713bbebddda/yarl-1.22.0-cp313-cp313-win_amd64.whl", hash = "sha256:47743b82b76d89a1d20b83e60d5c20314cbd5ba2befc9cda8f28300c4a08ed4d", size = 86965, upload-time = "2025-10-06T14:10:41.313Z" },
+    { url = "https://files.pythonhosted.org/packages/98/4d/264a01eae03b6cf629ad69bae94e3b0e5344741e929073678e84bf7a3e3b/yarl-1.22.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d0fcda9608875f7d052eff120c7a5da474a6796fe4d83e152e0e4d42f6d1a9b", size = 81205, upload-time = "2025-10-06T14:10:43.167Z" },
+    { url = "https://files.pythonhosted.org/packages/88/fc/6908f062a2f77b5f9f6d69cecb1747260831ff206adcbc5b510aff88df91/yarl-1.22.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:719ae08b6972befcba4310e49edb1161a88cdd331e3a694b84466bd938a6ab10", size = 146209, upload-time = "2025-10-06T14:10:44.643Z" },
+    { url = "https://files.pythonhosted.org/packages/65/47/76594ae8eab26210b4867be6f49129861ad33da1f1ebdf7051e98492bf62/yarl-1.22.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:47d8a5c446df1c4db9d21b49619ffdba90e77c89ec6e283f453856c74b50b9e3", size = 95966, upload-time = "2025-10-06T14:10:46.554Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/ce/05e9828a49271ba6b5b038b15b3934e996980dd78abdfeb52a04cfb9467e/yarl-1.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cfebc0ac8333520d2d0423cbbe43ae43c8838862ddb898f5ca68565e395516e9", size = 97312, upload-time = "2025-10-06T14:10:48.007Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/c5/7dffad5e4f2265b29c9d7ec869c369e4223166e4f9206fc2243ee9eea727/yarl-1.22.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4398557cbf484207df000309235979c79c4356518fd5c99158c7d38203c4da4f", size = 361967, upload-time = "2025-10-06T14:10:49.997Z" },
+    { url = "https://files.pythonhosted.org/packages/50/b2/375b933c93a54bff7fc041e1a6ad2c0f6f733ffb0c6e642ce56ee3b39970/yarl-1.22.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2ca6fd72a8cd803be290d42f2dec5cdcd5299eeb93c2d929bf060ad9efaf5de0", size = 323949, upload-time = "2025-10-06T14:10:52.004Z" },
+    { url = "https://files.pythonhosted.org/packages/66/50/bfc2a29a1d78644c5a7220ce2f304f38248dc94124a326794e677634b6cf/yarl-1.22.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca1f59c4e1ab6e72f0a23c13fca5430f889634166be85dbf1013683e49e3278e", size = 361818, upload-time = "2025-10-06T14:10:54.078Z" },
+    { url = "https://files.pythonhosted.org/packages/46/96/f3941a46af7d5d0f0498f86d71275696800ddcdd20426298e572b19b91ff/yarl-1.22.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c5010a52015e7c70f86eb967db0f37f3c8bd503a695a49f8d45700144667708", size = 372626, upload-time = "2025-10-06T14:10:55.767Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/42/8b27c83bb875cd89448e42cd627e0fb971fa1675c9ec546393d18826cb50/yarl-1.22.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d7672ecf7557476642c88497c2f8d8542f8e36596e928e9bcba0e42e1e7d71f", size = 341129, upload-time = "2025-10-06T14:10:57.985Z" },
+    { url = "https://files.pythonhosted.org/packages/49/36/99ca3122201b382a3cf7cc937b95235b0ac944f7e9f2d5331d50821ed352/yarl-1.22.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b7c88eeef021579d600e50363e0b6ee4f7f6f728cd3486b9d0f3ee7b946398d", size = 346776, upload-time = "2025-10-06T14:10:59.633Z" },
+    { url = "https://files.pythonhosted.org/packages/85/b4/47328bf996acd01a4c16ef9dcd2f59c969f495073616586f78cd5f2efb99/yarl-1.22.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f4afb5c34f2c6fecdcc182dfcfc6af6cccf1aa923eed4d6a12e9d96904e1a0d8", size = 334879, upload-time = "2025-10-06T14:11:01.454Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/ad/b77d7b3f14a4283bffb8e92c6026496f6de49751c2f97d4352242bba3990/yarl-1.22.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:59c189e3e99a59cf8d83cbb31d4db02d66cda5a1a4374e8a012b51255341abf5", size = 350996, upload-time = "2025-10-06T14:11:03.452Z" },
+    { url = "https://files.pythonhosted.org/packages/81/c8/06e1d69295792ba54d556f06686cbd6a7ce39c22307100e3fb4a2c0b0a1d/yarl-1.22.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:5a3bf7f62a289fa90f1990422dc8dff5a458469ea71d1624585ec3a4c8d6960f", size = 356047, upload-time = "2025-10-06T14:11:05.115Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/b8/4c0e9e9f597074b208d18cef227d83aac36184bfbc6eab204ea55783dbc5/yarl-1.22.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:de6b9a04c606978fdfe72666fa216ffcf2d1a9f6a381058d4378f8d7b1e5de62", size = 342947, upload-time = "2025-10-06T14:11:08.137Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/e5/11f140a58bf4c6ad7aca69a892bff0ee638c31bea4206748fc0df4ebcb3a/yarl-1.22.0-cp313-cp313t-win32.whl", hash = "sha256:1834bb90991cc2999f10f97f5f01317f99b143284766d197e43cd5b45eb18d03", size = 86943, upload-time = "2025-10-06T14:11:10.284Z" },
+    { url = "https://files.pythonhosted.org/packages/31/74/8b74bae38ed7fe6793d0c15a0c8207bbb819cf287788459e5ed230996cdd/yarl-1.22.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff86011bd159a9d2dfc89c34cfd8aff12875980e3bd6a39ff097887520e60249", size = 93715, upload-time = "2025-10-06T14:11:11.739Z" },
+    { url = "https://files.pythonhosted.org/packages/69/66/991858aa4b5892d57aef7ee1ba6b4d01ec3b7eb3060795d34090a3ca3278/yarl-1.22.0-cp313-cp313t-win_arm64.whl", hash = "sha256:7861058d0582b847bc4e3a4a4c46828a410bca738673f35a29ba3ca5db0b473b", size = 83857, upload-time = "2025-10-06T14:11:13.586Z" },
+    { url = "https://files.pythonhosted.org/packages/46/b3/e20ef504049f1a1c54a814b4b9bed96d1ac0e0610c3b4da178f87209db05/yarl-1.22.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:34b36c2c57124530884d89d50ed2c1478697ad7473efd59cfd479945c95650e4", size = 140520, upload-time = "2025-10-06T14:11:15.465Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/04/3532d990fdbab02e5ede063676b5c4260e7f3abea2151099c2aa745acc4c/yarl-1.22.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:0dd9a702591ca2e543631c2a017e4a547e38a5c0f29eece37d9097e04a7ac683", size = 93504, upload-time = "2025-10-06T14:11:17.106Z" },
+    { url = "https://files.pythonhosted.org/packages/11/63/ff458113c5c2dac9a9719ac68ee7c947cb621432bcf28c9972b1c0e83938/yarl-1.22.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:594fcab1032e2d2cc3321bb2e51271e7cd2b516c7d9aee780ece81b07ff8244b", size = 94282, upload-time = "2025-10-06T14:11:19.064Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/bc/315a56aca762d44a6aaaf7ad253f04d996cb6b27bad34410f82d76ea8038/yarl-1.22.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3d7a87a78d46a2e3d5b72587ac14b4c16952dd0887dbb051451eceac774411e", size = 372080, upload-time = "2025-10-06T14:11:20.996Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/3f/08e9b826ec2e099ea6e7c69a61272f4f6da62cb5b1b63590bb80ca2e4a40/yarl-1.22.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:852863707010316c973162e703bddabec35e8757e67fcb8ad58829de1ebc8590", size = 338696, upload-time = "2025-10-06T14:11:22.847Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/9f/90360108e3b32bd76789088e99538febfea24a102380ae73827f62073543/yarl-1.22.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:131a085a53bfe839a477c0845acf21efc77457ba2bcf5899618136d64f3303a2", size = 387121, upload-time = "2025-10-06T14:11:24.889Z" },
+    { url = "https://files.pythonhosted.org/packages/98/92/ab8d4657bd5b46a38094cfaea498f18bb70ce6b63508fd7e909bd1f93066/yarl-1.22.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:078a8aefd263f4d4f923a9677b942b445a2be970ca24548a8102689a3a8ab8da", size = 394080, upload-time = "2025-10-06T14:11:27.307Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/e7/d8c5a7752fef68205296201f8ec2bf718f5c805a7a7e9880576c67600658/yarl-1.22.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bca03b91c323036913993ff5c738d0842fc9c60c4648e5c8d98331526df89784", size = 372661, upload-time = "2025-10-06T14:11:29.387Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/2e/f4d26183c8db0bb82d491b072f3127fb8c381a6206a3a56332714b79b751/yarl-1.22.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:68986a61557d37bb90d3051a45b91fa3d5c516d177dfc6dd6f2f436a07ff2b6b", size = 364645, upload-time = "2025-10-06T14:11:31.423Z" },
+    { url = "https://files.pythonhosted.org/packages/80/7c/428e5812e6b87cd00ee8e898328a62c95825bf37c7fa87f0b6bb2ad31304/yarl-1.22.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:4792b262d585ff0dff6bcb787f8492e40698443ec982a3568c2096433660c694", size = 355361, upload-time = "2025-10-06T14:11:33.055Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/2a/249405fd26776f8b13c067378ef4d7dd49c9098d1b6457cdd152a99e96a9/yarl-1.22.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ebd4549b108d732dba1d4ace67614b9545b21ece30937a63a65dd34efa19732d", size = 381451, upload-time = "2025-10-06T14:11:35.136Z" },
+    { url = "https://files.pythonhosted.org/packages/67/a8/fb6b1adbe98cf1e2dd9fad71003d3a63a1bc22459c6e15f5714eb9323b93/yarl-1.22.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f87ac53513d22240c7d59203f25cc3beac1e574c6cd681bbfd321987b69f95fd", size = 383814, upload-time = "2025-10-06T14:11:37.094Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/f9/3aa2c0e480fb73e872ae2814c43bc1e734740bb0d54e8cb2a95925f98131/yarl-1.22.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:22b029f2881599e2f1b06f8f1db2ee63bd309e2293ba2d566e008ba12778b8da", size = 370799, upload-time = "2025-10-06T14:11:38.83Z" },
+    { url = "https://files.pythonhosted.org/packages/50/3c/af9dba3b8b5eeb302f36f16f92791f3ea62e3f47763406abf6d5a4a3333b/yarl-1.22.0-cp314-cp314-win32.whl", hash = "sha256:6a635ea45ba4ea8238463b4f7d0e721bad669f80878b7bfd1f89266e2ae63da2", size = 82990, upload-time = "2025-10-06T14:11:40.624Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/30/ac3a0c5bdc1d6efd1b41fa24d4897a4329b3b1e98de9449679dd327af4f0/yarl-1.22.0-cp314-cp314-win_amd64.whl", hash = "sha256:0d6e6885777af0f110b0e5d7e5dda8b704efed3894da26220b7f3d887b839a79", size = 88292, upload-time = "2025-10-06T14:11:42.578Z" },
+    { url = "https://files.pythonhosted.org/packages/df/0a/227ab4ff5b998a1b7410abc7b46c9b7a26b0ca9e86c34ba4b8d8bc7c63d5/yarl-1.22.0-cp314-cp314-win_arm64.whl", hash = "sha256:8218f4e98d3c10d683584cb40f0424f4b9fd6e95610232dd75e13743b070ee33", size = 82888, upload-time = "2025-10-06T14:11:44.863Z" },
+    { url = "https://files.pythonhosted.org/packages/06/5e/a15eb13db90abd87dfbefb9760c0f3f257ac42a5cac7e75dbc23bed97a9f/yarl-1.22.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:45c2842ff0e0d1b35a6bf1cd6c690939dacb617a70827f715232b2e0494d55d1", size = 146223, upload-time = "2025-10-06T14:11:46.796Z" },
+    { url = "https://files.pythonhosted.org/packages/18/82/9665c61910d4d84f41a5bf6837597c89e665fa88aa4941080704645932a9/yarl-1.22.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d947071e6ebcf2e2bee8fce76e10faca8f7a14808ca36a910263acaacef08eca", size = 95981, upload-time = "2025-10-06T14:11:48.845Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/9a/2f65743589809af4d0a6d3aa749343c4b5f4c380cc24a8e94a3c6625a808/yarl-1.22.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:334b8721303e61b00019474cc103bdac3d7b1f65e91f0bfedeec2d56dfe74b53", size = 97303, upload-time = "2025-10-06T14:11:50.897Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/ab/5b13d3e157505c43c3b43b5a776cbf7b24a02bc4cccc40314771197e3508/yarl-1.22.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e7ce67c34138a058fd092f67d07a72b8e31ff0c9236e751957465a24b28910c", size = 361820, upload-time = "2025-10-06T14:11:52.549Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/76/242a5ef4677615cf95330cfc1b4610e78184400699bdda0acb897ef5e49a/yarl-1.22.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d77e1b2c6d04711478cb1c4ab90db07f1609ccf06a287d5607fcd90dc9863acf", size = 323203, upload-time = "2025-10-06T14:11:54.225Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/96/475509110d3f0153b43d06164cf4195c64d16999e0c7e2d8a099adcd6907/yarl-1.22.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4647674b6150d2cae088fc07de2738a84b8bcedebef29802cf0b0a82ab6face", size = 363173, upload-time = "2025-10-06T14:11:56.069Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/66/59db471aecfbd559a1fd48aedd954435558cd98c7d0da8b03cc6c140a32c/yarl-1.22.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efb07073be061c8f79d03d04139a80ba33cbd390ca8f0297aae9cce6411e4c6b", size = 373562, upload-time = "2025-10-06T14:11:58.783Z" },
+    { url = "https://files.pythonhosted.org/packages/03/1f/c5d94abc91557384719da10ff166b916107c1b45e4d0423a88457071dd88/yarl-1.22.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e51ac5435758ba97ad69617e13233da53908beccc6cfcd6c34bbed8dcbede486", size = 339828, upload-time = "2025-10-06T14:12:00.686Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/97/aa6a143d3afba17b6465733681c70cf175af89f76ec8d9286e08437a7454/yarl-1.22.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33e32a0dd0c8205efa8e83d04fc9f19313772b78522d1bdc7d9aed706bfd6138", size = 347551, upload-time = "2025-10-06T14:12:02.628Z" },
+    { url = "https://files.pythonhosted.org/packages/43/3c/45a2b6d80195959239a7b2a8810506d4eea5487dce61c2a3393e7fc3c52e/yarl-1.22.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:bf4a21e58b9cde0e401e683ebd00f6ed30a06d14e93f7c8fd059f8b6e8f87b6a", size = 334512, upload-time = "2025-10-06T14:12:04.871Z" },
+    { url = "https://files.pythonhosted.org/packages/86/a0/c2ab48d74599c7c84cb104ebd799c5813de252bea0f360ffc29d270c2caa/yarl-1.22.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:e4b582bab49ac33c8deb97e058cd67c2c50dac0dd134874106d9c774fd272529", size = 352400, upload-time = "2025-10-06T14:12:06.624Z" },
+    { url = "https://files.pythonhosted.org/packages/32/75/f8919b2eafc929567d3d8411f72bdb1a2109c01caaab4ebfa5f8ffadc15b/yarl-1.22.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0b5bcc1a9c4839e7e30b7b30dd47fe5e7e44fb7054ec29b5bb8d526aa1041093", size = 357140, upload-time = "2025-10-06T14:12:08.362Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/72/6a85bba382f22cf78add705d8c3731748397d986e197e53ecc7835e76de7/yarl-1.22.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c0232bce2170103ec23c454e54a57008a9a72b5d1c3105dc2496750da8cfa47c", size = 341473, upload-time = "2025-10-06T14:12:10.994Z" },
+    { url = "https://files.pythonhosted.org/packages/35/18/55e6011f7c044dc80b98893060773cefcfdbf60dfefb8cb2f58b9bacbd83/yarl-1.22.0-cp314-cp314t-win32.whl", hash = "sha256:8009b3173bcd637be650922ac455946197d858b3630b6d8787aa9e5c4564533e", size = 89056, upload-time = "2025-10-06T14:12:13.317Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/86/0f0dccb6e59a9e7f122c5afd43568b1d31b8ab7dda5f1b01fb5c7025c9a9/yarl-1.22.0-cp314-cp314t-win_amd64.whl", hash = "sha256:9fb17ea16e972c63d25d4a97f016d235c78dd2344820eb35bc034bc32012ee27", size = 96292, upload-time = "2025-10-06T14:12:15.398Z" },
+    { url = "https://files.pythonhosted.org/packages/48/b7/503c98092fb3b344a179579f55814b613c1fbb1c23b3ec14a7b008a66a6e/yarl-1.22.0-cp314-cp314t-win_arm64.whl", hash = "sha256:9f6d73c1436b934e3f01df1e1b21ff765cd1d28c77dfb9ace207f746d4610ee1", size = 85171, upload-time = "2025-10-06T14:12:16.935Z" },
+    { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" },
 ]
 
 [[package]]